/* * Revision Control Information * * $Source: /vol/opua/opua2/sis/sis-1.2/common/src/sis/map/RCS/fanout_est.c,v $ * $Author: sis $ * $Revision: 1.6 $ * $Date: 1993/08/05 18:22:27 $ * */ /* file @(#)fanout_est.c 1.4 */ /* last modified on 7/2/91 at 19:44:23 */ /* * $Log: fanout_est.c,v $ * Revision 1.6 1993/08/05 18:22:27 sis * Bug fix : now handles libraries without 2-nand. * * Revision 1.2 1993/08/05 16:14:32 cmoon * need to watch out for library without 2-NAND * * Revision 1.1 1993/08/05 15:34:20 cmoon * Initial revision * * Revision 1.3 1992/05/06 18:55:51 sis * SIS release 1.1 * * Revision 1.2 1992/04/17 21:53:13 sis * *** empty log message *** * * Revision 1.1 92/01/08 17:40:37 sis * Initial revision * * Revision 1.2 91/07/02 10:53:16 touati * lint cleanup. * * Revision 1.1 91/06/30 16:38:50 touati * Initial revision * */ #include "sis.h" #include "map_macros.h" #include "map_int.h" #include "bin_int.h" #include "lib_int.h" #include "fanout_int.h" #include "fanout_delay.h" #include "fanout_est_static.h" /* this file takes as input an abstract specification */ /* of a fanout tree and returns a data structure directly usable */ /* for bin_delay.c to evaluate arrival times at tree leaves. */ /* here are the routines this file is expected to implement */ /* * * * void fanout_est_get_prim_pin_fanout(prim, pin, pin_info) * prim_t *prim; * int pin; * pin_info_t *pin_info; * Given a prim describing a match and a pin number, figures out * the leaf (== source), the fanout index and the polarity wrt to the source * of a multiple fanout point. If the source is not a multiple fanout point * everything is set up to NIL values. The prim should be currently * within a match (i.e. its bindings should hold). * * void fanout_est_compute_fanout_info(node) * node_t *node; * Given a node that is asserted to be multiple fanout, we compute the * information stored in BIN(node)->fanout used to estimate delays at * a multiple fanout point. The routine fanout_est_get_est_fanout_leaves * does most of the work. If a fanout tree is found at MAP(node)->fanout_tree * it is used, otherwise a dummy one is made up and used. * * fanout_bucket_t *fanout_est_fanout_bucket_alloc(info) * multi_fanout_t *info; * Simply allocates a new bucket and links it to a linked list * whose head is kept in info. Why? To free memory when done. * * void fanout_est_free_fanout_info(node) * node_t *node; * Does not actually free all the storage associated with BIN(node)->fanout. * Only frees the storage allocated for the fanout buckets with calls to * fanout_est_fanout_bucket_alloc. * * delay_time_t fanout_est_get_pin_arrival_time(pin_info, pin_load) * pin_info_t *pin_info; * double pin_load; * Asserts that the pin_info corresponds to a pin fed by a leaf (a source) * of fanout > 1. Takes the leaf, that should have BIN(leaf)->info already * computed for it, to estimate the arrival time a the pin described by pin_info. * This routine is the main functionality provided by "fanout_est.c". * */ /* EXTERNAL INTERFACE */ /* given a prim describing a match, and a pin, figure out the leaf and the fanout index */ /* tracks down the first multiple fanout point connected to input of that gate */ /* through inverters only. If none, set pin_info->leaf to NIL. If there is one, keep in input_fanout */ /* the fanout from where it came. If not given, need to use some more complicated technique */ /* to figure out the correct fanout_index. */ /* WARNING: if prim == NIL, means a PO */ void fanout_est_get_prim_pin_fanout(prim, pin, pin_info) prim_t *prim; int pin; pin_info_t *pin_info; { node_t *input; node_t *input_fanout; int polarity; if (prim == NIL(prim_t)) { input_fanout = pin_info->input; assert(input_fanout->type == PRIMARY_OUTPUT); input = node_get_fanin(input_fanout, 0); } else { assert(pin >= 0 && pin < prim->ninputs); input = pin_info->input; input_fanout = fanout_est_get_pin_fanout_node(prim, pin); } polarity = POLAR_X; while (node_function(input) == NODE_INV && node_num_fanout(input) == 1) { input_fanout = input; input = node_get_fanin(input, 0); polarity = POLAR_INV(polarity); } if (node_num_fanout(input) == 1) { /* no multiple fanout point encountered: stop */ pin_info->leaf = NIL(node_t); pin_info->fanout_index = -1; pin_info->pin_polarity = -1; } else { pin_info->leaf = input; pin_info->fanout_index = fanout_est_get_fanout_index(input, input_fanout); if (polarity == POLAR_X) { pin_info->pin_polarity = BIN(input)->fanout->polarity; } else { pin_info->pin_polarity = POLAR_INV(BIN(input)->fanout->polarity); } } } /* * Given a node that is asserted to be multiple fanout, we compute the * information stored in BIN(node)->fanout used to estimate delays at * a multiple fanout point. The routine fanout_est_get_est_fanout_leaves * does most of the work. If a fanout tree is found at MAP(node)->fanout_tree * it is used, otherwise a dummy one is made up and used. */ static bin_global_t *global_options; void fanout_est_compute_fanout_info(node, options) node_t *node; bin_global_t *options; { int polarity; node_t *source; delay_bucket_t *bucket; pin_info_t pin_info; lib_gate_t *default_inv; int n_fanouts = node_num_fanout(node); global_options = options; assert(n_fanouts > 1 && BIN(node)->fanout != NIL(multi_fanout_t)); fanout_est_get_est_fanout_leaves(node); source = MAP(node)->fanout_source; if (options->allow_duplication && BIN(node)->fanout->polarity == POLAR_Y) { /* * If node is POLAR_X, there is no duplication possible: node is PI or constant. * In that case, we skip this part of the code. * Otherwise, if we allow duplication, it means we allow a tree to be duplicated * to provide a signal in both phases (X and X'). Only one tree producing X * and one tree producing X' is authorized. * To get that effect, we just let the algorithm selects the gate that * seems most appropriate. No forcing of inverters. * The only effect of "best_is_inverter" on bin_delay.c is to allow, if set * the use of the fanin of node as a possible source. Here we overload * "best_is_inverter" to get the effect of duplication. * NOTE: we need to assign a reasonable load to both sources in that case, * otherwise the default (+ INFINITY) is used, and large inverters are forced * everywhere, meaning no logic duplication anywhere. */ BIN(node)->fanout->best_is_inverter = 1; /* CASE 1: full load as predicted by load heuristic on both trees */ polarity = BIN(node)->fanout->polarity; if (source == node) { BIN(node)->fanout->loads[POLAR_INV(polarity)] = BIN(node)->fanout->loads[polarity]; } else { BIN(node)->fanout->loads[polarity] = BIN(node)->fanout->loads[POLAR_INV(polarity)]; } /* CASE 2: no load: // BIN(node)->fanout->loads[POLAR_X] = 0.0; BIN(node)->fanout->loads[POLAR_Y] = 0.0; */ /* CASE 3: half load: // polarity = BIN(node)->fanout->polarity; if (source == node) { load = BIN(node)->fanout->loads[polarity]; } else { load = BIN(node)->fanout->loads[POLAR_INV(polarity)]; } assert(load < INFINITY); BIN(node)->fanout->loads[POLAR_INV(polarity)] = load / 2; BIN(node)->fanout->loads[polarity] = load / 2; */ /* CASE 4: force the selections of non inverters // polarity = BIN(node)->fanout->polarity; if (node_num_fanin(node) > 0) { assert(polarity == POLAR_Y); if (source == node) { load = BIN(node)->fanout->loads[polarity]; } else { load = BIN(node)->fanout->loads[POLAR_INV(polarity)]; } if (source == node) { source = node_get_fanin(node, 0); BIN(node)->fanout->loads[POLAR_INV(polarity)] = fanout_est_select_non_inverter(source, load); } else { BIN(node)->fanout->loads[polarity] = fanout_est_select_non_inverter(node, load); } } */ } else if (source == node) { /* * node may be NODE_Y or the source itself (if PI or constant feeding a PO) * in any case, its load has already been computed in fanout_est_get_est_fanout_leaves * as the load of the fanout tree. * the value of the other load is irrelevant. */ BIN(node)->fanout->best_is_inverter = 0; } else { assert(BIN(node)->fanout->polarity == POLAR_Y); assert(node_function(node) == NODE_INV); BIN(node)->fanout->best_is_inverter = 1; /* * node Y is mapped to an inverter * modify BIN(node)->pwl in order to make sure to get an inverter at that node. * the correct BIN(node)->load[POLAR_X] was computed in fanout_est_get_est_fanout_leaves. */ pwl_free(BIN(node)->pwl); pin_info.leaf = NIL(node_t); pin_info.input = node_get_fanin(node, 0); BIN(node)->pwl = pwl_create_empty(); default_inv = lib_get_default_inverter(); bucket = bin_delay_compute_gate_bucket(default_inv, 1, &pin_info); bin_delay_update_node_pwl(node, bucket); BIN(node)->fanout->loads[POLAR_Y] = 0.0; } } /* alloc's a bucket and put it on the fanout_bucket list. */ fanout_bucket_t *fanout_est_fanout_bucket_alloc(info) multi_fanout_t *info; { fanout_bucket_t *result = ALLOC(fanout_bucket_t, 1); result->next = info->buckets; info->buckets = result; return result; } /* * Simply allocates a new bucket and links it to a linked list * whose head is kept in info. Why? To free memory when done. */ void fanout_est_free_fanout_info(node) node_t *node; { assert(BIN(node)->fanout != NIL(multi_fanout_t)); fanout_bucket_free(BIN(node)->fanout); } /* basically, find the right pwl in the fanout structure for that polarity, */ /* that index and that load. Compensate for the load and returns the arrival time */ /* ASSERTION: this pwl corresponds to the selection of a single gate here. */ /* EXTENSIONS: check that pin_load <= leaf->load and otherwise return +infty */ /* this will be done for the second phase, when we iterate the code. */ delay_time_t fanout_est_get_pin_arrival_time(pin_info, pin_load) pin_info_t *pin_info; double pin_load; { double load; fanout_leaf_t *leaf; assert(pin_info->leaf != NIL(node_t)); assert(pin_info->pin_polarity == POLAR_X || pin_info->pin_polarity == POLAR_Y); assert(pin_info->fanout_index >= 0 && pin_info->fanout_index < node_num_fanout(pin_info->leaf)); leaf = &(BIN(pin_info->leaf)->fanout->leaves[pin_info->pin_polarity][pin_info->fanout_index]); load = leaf->bucket->load + (pin_load - leaf->load); return bin_delay_compute_delay_pwl_delay(leaf->bucket->pwl, load); } /* INTERNAL INTERFACE */ /* compute the fanout index of each pin, wrt to multiple fanout nodes */ /* if node not multiple fanout, no information is computed */ /* the real work is done by fanout_est_get_prim_pin_fanout */ static int get_fanin_fanout_indices(prim) prim_t *prim; { int i; node_t *root; pin_info_t pin_info; root = map_prim_get_root(prim); for (i = 0; i < prim->ninputs; i++) { if (MAP(root)->save_binding[i] != prim->inputs[i]->binding) return 1; } for (i = 0; i < prim->ninputs; i++) { pin_info.input = MAP(root)->save_binding[i]; fanout_est_get_prim_pin_fanout(prim, i, &pin_info); MAP(root)->pin_info[i].pin_source = pin_info.leaf; MAP(root)->pin_info[i].fanout_index = pin_info.fanout_index; } prim->data = (char *) -1; return 0; } /* * this routine is used when the pin of the prim (i.e. prim->inputs[pin]->binding) * is a multiple fanout node_t node. We need to get the fanout of that node * that is matched in the prim. This routine does that. The prim should be active: * i.e. the call should be made only within an active gen_all_matches. */ static node_t *fanout_est_get_pin_fanout_node(prim, pin) prim_t *prim; int pin; { prim_node_t *node; prim_edge_t *edge; node = prim->inputs[pin]; assert(node->nfanout == 1); for (edge = prim->edges; edge != NIL(prim_edge_t); edge = edge->next) { if (edge->this_node == node) return edge->connected_node->binding; } return NIL(node_t); } /* * given a node and one of its fanouts, figures out which fanout index * the fanout corresponds to. Needs to have BIN(node)->fanout allocated for that. */ static int fanout_est_get_fanout_index(node, fanout) node_t *node; node_t *fanout; { int fanout_index; assert(BIN(node)->fanout != NIL(multi_fanout_t)); assert(st_lookup_int(BIN(node)->fanout->index_table, (char *) fanout, &fanout_index)); return fanout_index; } /* at this point, there is one tree that can service half of all possible leaves */ /* the other half are those with reverse polarity */ /* We first fill the delay information for leaves that are directly connected to this tree */ /* then we add delay information for the reversed polarity leaves simply by adding an inverter */ static void fanout_est_get_est_fanout_leaves(node) node_t *node; { node_t *source; array_t *fanout_tree; int source_polarity; if (MAP(node)->fanout_tree == NIL(array_t)) { fanout_est_create_dummy_fanout_tree(node); } source = MAP(node)->fanout_source; fanout_tree = MAP(node)->fanout_tree; assert(source != NIL(node_t)); assert(fanout_tree != NIL(array_t)); source_polarity = fanout_est_get_source_polarity(node, source); fanout_delay_add_pwl_source(source, source_polarity); fanout_tree_extract_fanout_leaves(BIN(node)->fanout, fanout_tree); BIN(node)->fanout->loads[source_polarity] = fanout_tree_get_source_load(fanout_tree); fanout_delay_free_sources(); fanout_tree_free(fanout_tree, 1); MAP(node)->fanout_tree = NIL(array_t); fanout_est_extract_remaining_fanout_leaves(BIN(node)->fanout); } /* * First, create a fanout info representing the problem. * We are conservative here: create it with n the number of fanouts * We try both polarities, but separately, one at a time. And a take the source * that is globally the better one. * As load, we use the load of the first input pin of the smallest NAND2 gate * As required time, (0.0, 0.0) * We then try to build the best fanout tree for that, trying both sources * and recording the best result in BIN(node)->fanout. */ static void fanout_est_create_dummy_fanout_tree(node) node_t *node; { int p, q; int best_source_polarity; int best_sink_polarity; opt_array_t fanout_info[POLAR_MAX][POLAR_MAX]; node_t *sources[POLAR_MAX]; fanout_cost_t costs[POLAR_MAX][POLAR_MAX]; array_t *trees[POLAR_MAX][POLAR_MAX]; fanout_cost_t total_cost[POLAR_MAX]; fanout_alg_t alg; if (node_num_fanin(node) > 0) { sources[POLAR_X] = node_get_fanin(node, 0); sources[POLAR_Y] = node; } else { sources[POLAR_X] = node; sources[POLAR_Y] = NIL(node_t); } if (global_options->load_estimation == 1) { noalg_init(NIL(network_t), &alg); } else { balanced_init(NIL(network_t), &alg); } foreach_polarity(p) { fanout_est_compute_dummy_fanout_info(node, fanout_info[p], p); foreach_polarity(q) { trees[p][q] = fanout_tree_alloc(); costs[p][q].slack = MINUS_INFINITY; costs[p][q].area = INFINITY; if (sources[q] == NIL(node_t)) continue; fanout_delay_add_pwl_source(sources[q], q); assert((*alg.optimize)(fanout_info[p], trees[p][q], &costs[p][q])); fanout_tree_add_edges(trees[p][q]); fanout_delay_free_sources(); } } /* * take the source with the best performance */ foreach_polarity(q) { if (fanout_opt_is_better_cost(&costs[POLAR_X][q], &costs[POLAR_Y][q])) { total_cost[q] = costs[POLAR_X][q]; } else { total_cost[q] = costs[POLAR_Y][q]; } } if (fanout_opt_is_better_cost(&total_cost[POLAR_X], &total_cost[POLAR_Y])) { best_source_polarity = POLAR_X; } else { best_source_polarity = POLAR_Y; } MAP(node)->fanout_source = sources[best_source_polarity]; /* * for that better source, choose the better polarity: keep that tree only */ if (fanout_opt_is_better_cost(&costs[POLAR_X][best_source_polarity], &costs[POLAR_Y][best_source_polarity])) { best_sink_polarity = POLAR_X; } else { best_sink_polarity = POLAR_Y; } MAP(node)->fanout_tree = trees[best_sink_polarity][best_source_polarity]; fanout_tree_save_links(MAP(node)->fanout_tree); foreach_polarity(p) { foreach_polarity(q) { fanout_info_free(&fanout_info[p][q], 1); if (p == best_sink_polarity && q == best_source_polarity) continue; fanout_tree_free(trees[p][q], 0); } } } /* polarity rule: if multi fanout source is INV -> POLAR_Y */ /* otherwise it gets POLAR_X */ static int fanout_est_get_source_polarity(fanout_point, source) node_t *fanout_point; node_t *source; { int fanout_polarity; assert(node_num_fanout(fanout_point) > 1); fanout_polarity = BIN(fanout_point)->fanout->polarity; return (source == fanout_point) ? fanout_polarity : POLAR_INV(fanout_polarity); } static void fanout_est_extract_remaining_fanout_leaves(fanout) multi_fanout_t *fanout; { int i, p; fanout_leaf_t *leaf[POLAR_MAX]; int n_fanouts = fanout->n_fanouts; int needed_polarity; int existing_leaves; for (i = 0; i < n_fanouts; i++) { existing_leaves = 0; foreach_polarity(p) { leaf[p] = &(fanout->leaves[p][i]); if (leaf[p]->bucket != NIL(fanout_bucket_t)) { existing_leaves++; } } assert(existing_leaves == 1); needed_polarity = (leaf[POLAR_X]->bucket == NIL(fanout_bucket_t)) ? POLAR_X : POLAR_Y; fanout_est_extend_fanout_leaf(fanout, leaf[needed_polarity], leaf[POLAR_INV(needed_polarity)]); } } /* * put in the links entries of the form: (NIL(node_t), fanout_index) * this kind of sinks is recognized by fanout_est_get_pin_fanout_index. */ static void fanout_est_compute_dummy_fanout_info(node, fanout_info, polarity) node_t *node; opt_array_t *fanout_info; int polarity; { int i, p; int n_fanouts; lib_gate_t *default_nand2; double default_load; gate_link_t link; gate_link_t *link_ptr; n_fanouts = BIN(node)->fanout->n_fanouts; default_nand2 = choose_smallest_gate(lib_get_library(), "f=!(a*b);"); if (default_nand2 == NIL(lib_gate_t)) { default_nand2 = choose_smallest_gate(lib_get_library(), "f=(a*b);"); if (default_nand2 == NIL(lib_gate_t)) { default_nand2 = choose_smallest_gate(lib_get_library(), "f=!(a+b);"); if (default_nand2 == NIL(lib_gate_t)) { default_nand2 = choose_smallest_gate(lib_get_library(), "f=(a+b);"); } } } assert(default_nand2 != NIL(lib_gate_t)); default_load = delay_get_load(default_nand2->delay_info[0]); link.node = NIL(node_t); link.pin = 0; link.load = default_load; link.slack = 0.0; link.required = ZERO_DELAY; foreach_polarity(p) { fanout_info[p].links = st_init_table(st_ptrcmp, st_ptrhash); if (p == polarity) { for (i = 0; i < n_fanouts; i++) { link_ptr = ALLOC(gate_link_t, 1); *link_ptr = link; link_ptr->pin = i; st_insert(fanout_info[p].links, (char *) link_ptr, NIL(char)); } } fanout_info_preprocess(&fanout_info[p]); } } static void fanout_est_extend_fanout_leaf(fanout, new_leaf, old_leaf) multi_fanout_t *fanout; fanout_leaf_t *new_leaf; fanout_leaf_t *old_leaf; { int gate_index; lib_gate_t *gate; double load; pin_info_t pin_info; delay_pwl_t best_pwl; delay_pwl_t pwl; delay_time_t arrival, best_arrival; n_gates_t n_gates; new_leaf->load = old_leaf->load; new_leaf->bucket = fanout_est_fanout_bucket_alloc(fanout); new_leaf->bucket->load = old_leaf->load; if (global_options->ignore_polarity) { best_pwl.rise = pwl_dup(old_leaf->bucket->pwl.rise); best_pwl.fall = pwl_dup(old_leaf->bucket->pwl.fall); } else { best_pwl.rise = pwl_create_empty(); best_pwl.fall = pwl_create_empty(); best_arrival = PLUS_INFINITY; n_gates = fanout_delay_get_n_gates(); foreach_inverter(n_gates, gate_index) { gate = fanout_delay_get_gate(gate_index); load = fanout_delay_get_buffer_load(gate_index); load += old_leaf->bucket->load - old_leaf->load; pin_info.arrival = bin_delay_compute_delay_pwl_delay(old_leaf->bucket->pwl, load); pwl = bin_delay_compute_gate_pwl(gate, 1, &pin_info); arrival = bin_delay_compute_delay_pwl_delay(pwl, old_leaf->load); if (GETMAX(arrival) < GETMAX(best_arrival)) { pwl_free(best_pwl.rise); pwl_free(best_pwl.fall); best_pwl = pwl; best_arrival = arrival; } else { pwl_free(pwl.rise); pwl_free(pwl.fall); } } } new_leaf->bucket->pwl = best_pwl; } /* free all the buckets associated with a multi_fanout_t structure */ static void fanout_bucket_free(info) multi_fanout_t *info; { fanout_bucket_t *head; fanout_bucket_t *next; for (head = info->buckets; head != NIL(fanout_bucket_t); head = next) { pwl_free(head->pwl.rise); pwl_free(head->pwl.fall); next = head->next; FREE(head); } } /* given a node and a load, figure out the closest load to "load" */ /* that selects a gate that is not an inverter */ static double fanout_est_select_non_inverter(node, load) node_t *node; double load; { int i; double best_load; double distance; double best_distance; pwl_point_t *point; delay_bucket_t *bucket; pwl_t *pwl = BIN(node)->pwl; /* first check whether any load correction is necessary */ point = pwl_lookup(pwl, load); bucket = (delay_bucket_t *) pwl_point_data(point); if (bucket->ninputs > 1) return load; /* if it is, limit the damage */ best_load = load; best_distance = INFINITY; for (i = 0; i < pwl->n_points; i++) { bucket = (delay_bucket_t *) pwl->points[i].data; if (bucket->ninputs > 1) { distance = load - pwl->points[i].x; if (distance < 0) distance = - distance; if (distance < best_distance) { best_load = pwl->points[i].x; } } } return best_load; }