d0/d67/route__timing_8c_source.html

00001 #include <stdio.h>
00002 #include <math.h>
00003 #include "util.h"
00004 #include "vpr_types.h"
00005 #include "globals.h"
00006 #include "mst.h"
00007 #include "route_export.h"
00008 #include "route_common.h"
00009 #include "route_tree_timing.h"
00010 #include "route_timing.h"
00011 #include "heapsort.h"
00012 #include "path_delay.h"
00013 #include "net_delay.h"
00014 #include "stats.h"
00015
00016 /******************** Subroutines local to route_timing.c ********************/
00017
00018 static int get_max_pins_per_net(void);
00019
00020 static void add_route_tree_to_heap(t_rt_node * rt_node,
00021                                    int target_node,
00022                                    float target_criticality,
00023                                    float astar_fac);
00024
00025 static void timing_driven_expand_neighbours(struct s_heap *current,
00026                                             int inet,
00027                                             float bend_cost,
00028                                             float criticality_fac,
00029                                             int target_node,
00030                                             float astar_fac,
00031                                                 int highfanout_rlim);
00032
00033 static float get_timing_driven_expected_cost(int inode,
00034                                              int target_node,
00035                                              float criticality_fac,
00036                                              float R_upstream);
00037
00038 static int get_expected_segs_to_target(int inode,
00039                                        int target_node,
00040                                        int *num_segs_ortho_dir_ptr);
00041
00042 static void update_rr_base_costs(int inet,
00043                                  float largest_criticality);
00044
00045 static void timing_driven_check_net_delays(float **net_delay);
00046
00047 static int mark_node_expansion_by_bin(int inet, int target_node, t_rt_node * rt_node);
00048
00049
00050 /************************ Subroutine definitions *****************************/
00051
00052 /* Timing-driven routing algorithm.  The timing graph (includes net_slack)
00053  * must have already been allocated, and net_delay must have been allocated.
00054  * @returns TRUE if the routing succeeds, FALSE otherwise.
00055  */
00056 boolean
00057 try_timing_driven_route(struct s_router_opts router_opts,
00058                         float **net_slack,
00059                         float **net_delay,
00060                         t_ivec ** clb_opins_used_locally)
00061 {
00062
00063     int itry, inet, ipin, i;
00064     boolean success, is_routable, rip_up_local_opins;
00065     float *pin_criticality;     /* [1..max_pins_per_net-1]. */
00066     int *sink_order;            /* [1..max_pins_per_net-1]. */
00067     t_rt_node **rt_node_of_sink;        /* [1..max_pins_per_net-1]. */
00068     float T_crit, pres_fac;
00069
00070         float *sinks;
00071         int *net_index;
00072
00073         int bends;
00074     int wirelength, total_wirelength, available_wirelength;
00075     int segments;
00076
00077         sinks = my_malloc(sizeof(float) * num_nets);
00078         net_index = my_malloc(sizeof(int) * num_nets);
00079         for(i = 0; i < num_nets; i++) {
00080                 sinks[i] = clb_net[i].num_sinks;
00081                 net_index[i] = i;
00082         }
00083         heapsort(net_index, sinks, num_nets, 1);
00084
00085     alloc_timing_driven_route_structs(&pin_criticality, &sink_order,
00086                                       &rt_node_of_sink);
00087
00088 /* First do one routing iteration ignoring congestion and marking all sinks  *
00089  * on each net as critical to get reasonable net delay estimates.            */
00090
00091     for(inet = 0; inet < num_nets; inet++)
00092         {
00093             if(clb_net[inet].is_global == FALSE)
00094                 {
00095                     for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
00096                         net_slack[inet][ipin] = 0.;
00097                 }
00098             else
00099                 {               /* Set delay of global signals to zero. */
00100                     for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
00101                         net_delay[inet][ipin] = 0.;
00102                 }
00103         }
00104
00105     T_crit = 1.;
00106     pres_fac = router_opts.first_iter_pres_fac; /* Typically 0 -> ignore cong. */
00107
00108     for(itry = 1; itry <= router_opts.max_router_iterations; itry++)
00109         {
00110
00111             for(i = 0; i < num_nets; i++)
00112                 {
00113                         inet = net_index[i];
00114                     if(clb_net[inet].is_global == FALSE)
00115                         {       /* Skip global nets. */
00116
00117                             is_routable =
00118                                 timing_driven_route_net(inet, pres_fac,
00119                                                         router_opts.
00120                                                         max_criticality,
00121                                                         router_opts.
00122                                                         criticality_exp,
00123                                                         router_opts.astar_fac,
00124                                                         router_opts.bend_cost,
00125                                                         net_slack[inet],
00126                                                         pin_criticality,
00127                                                         sink_order,
00128                                                         rt_node_of_sink,
00129                                                         T_crit,
00130                                                         net_delay[inet]);
00131
00132                             /* Impossible to route? (disconnected rr_graph) */
00133
00134                             if(!is_routable)
00135                                 {
00136                                     printf("Routing failed.\n");
00137                                     free_timing_driven_route_structs
00138                                         (pin_criticality, sink_order,
00139                                          rt_node_of_sink);
00140                                         free(net_index);
00141                                         free(sinks);
00142                                     return (FALSE);
00143                                 }
00144                         }
00145                 }
00146
00147
00148                 if(itry == 1) {
00149                         /* Early exit code for cases where it is obvious that a successful route will not be found
00150                            Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit
00151                         */
00152                         total_wirelength = 0;
00153                         available_wirelength = 0;
00154
00155                         for(i = 0; i < num_rr_nodes; i++) {
00156                                 if(rr_node[i].type == CHANX || rr_node[i].type == CHANY)
00157                                 {
00158                                         available_wirelength += 1 + rr_node[i].xhigh - rr_node[i].xlow +
00159                                         rr_node[i].yhigh - rr_node[i].ylow;
00160                                 }
00161                         }
00162
00163                         for(inet = 0; inet < num_nets; inet++)
00164                         {
00165                                 if(clb_net[inet].is_global == FALSE && clb_net[inet].num_sinks != 0)
00166                                 {               /* Globals don't count. */
00167                                         get_num_bends_and_length(inet, &bends, &wirelength,
00168                                                                  &segments);
00169
00170                                         total_wirelength += wirelength;
00171                                 }
00172                         }
00173                         printf("wirelength after first iteration %d, total available wirelength %d, ratio %g\n", total_wirelength, available_wirelength, (float)(total_wirelength)/(float)(available_wirelength));
00174                         if((float)(total_wirelength)/(float)(available_wirelength) > FIRST_ITER_WIRELENTH_LIMIT) {
00175                                 printf("Wirelength usage ratio exceeds limit of %g, fail routing\n", FIRST_ITER_WIRELENTH_LIMIT);
00176                                  free_timing_driven_route_structs
00177      (pin_criticality, sink_order,
00178       rt_node_of_sink);
00179                                 free(net_index);
00180                                 free(sinks);
00181                                 return FALSE;
00182                         }
00183                 }
00184
00185
00186             /* Make sure any CLB OPINs used up by subblocks being hooked directly     *
00187              * to them are reserved for that purpose.                                 */
00188
00189             if(itry == 1)
00190                 rip_up_local_opins = FALSE;
00191             else
00192                 rip_up_local_opins = TRUE;
00193
00194             reserve_locally_used_opins(pres_fac, rip_up_local_opins,
00195                                        clb_opins_used_locally);
00196
00197             /* Pathfinder guys quit after finding a feasible route. I may want to keep *
00198              * going longer, trying to improve timing.  Think about this some.         */
00199
00200             success = feasible_routing();
00201             if(success)
00202                 {
00203                     printf
00204                         ("Successfully routed after %d routing iterations.\n",
00205                          itry);
00206                     free_timing_driven_route_structs(pin_criticality,
00207                                                      sink_order,
00208                                                      rt_node_of_sink);
00209 #ifdef DEBUG
00210                     timing_driven_check_net_delays(net_delay);
00211 #endif
00212                         free(net_index);
00213                         free(sinks);
00214                     return (TRUE);
00215                 }
00216
00217             if(itry == 1)
00218                 {
00219                     pres_fac = router_opts.initial_pres_fac;
00220                     pathfinder_update_cost(pres_fac, 0.);       /* Acc_fac=0 for first iter. */
00221                 }
00222             else
00223                 {
00224                     pres_fac *= router_opts.pres_fac_mult;
00225
00226                         /* Avoid overflow for high iteration counts, even if acc_cost is big */
00227                         pres_fac = min (pres_fac, HUGE_FLOAT / 1e5);
00228
00229                     pathfinder_update_cost(pres_fac, router_opts.acc_fac);
00230                 }
00231
00232             /* Update slack values by doing another timing analysis.                 *
00233              * Timing_driven_route_net updated the net delay values.                 */
00234
00235             load_timing_graph_net_delays(net_delay);
00236             T_crit = load_net_slack(net_slack, 0);
00237             printf("T_crit: %g.\n", T_crit);
00238                 fflush(stdout);
00239         }
00240
00241     printf("Routing failed.\n");
00242     free_timing_driven_route_structs(pin_criticality, sink_order,
00243                                      rt_node_of_sink);
00244         free(net_index);
00245         free(sinks);
00246     return (FALSE);
00247 }
00248
00249
00250 /** Allocates all the structures needed only by the timing-driven router.   */
00251 void
00252 alloc_timing_driven_route_structs(float **pin_criticality_ptr,
00253                                   int **sink_order_ptr,
00254                                   t_rt_node *** rt_node_of_sink_ptr)
00255 {
00256     int max_pins_per_net;
00257     float *pin_criticality;
00258     int *sink_order;
00259     t_rt_node **rt_node_of_sink;
00260
00261     max_pins_per_net = get_max_pins_per_net();
00262
00263     pin_criticality =
00264         (float *)my_malloc((max_pins_per_net - 1) * sizeof(float));
00265     *pin_criticality_ptr = pin_criticality - 1; /* First sink is pin #1. */
00266
00267     sink_order = (int *)my_malloc((max_pins_per_net - 1) * sizeof(int));
00268     *sink_order_ptr = sink_order - 1;
00269
00270     rt_node_of_sink = (t_rt_node **) my_malloc((max_pins_per_net - 1) *
00271                                                sizeof(t_rt_node *));
00272     *rt_node_of_sink_ptr = rt_node_of_sink - 1;
00273
00274     alloc_route_tree_timing_structs();
00275 }
00276
00277
00278 /** Frees all the stuctures needed only by the timing-driven router.        */
00279 void
00280 free_timing_driven_route_structs(float *pin_criticality,
00281                                  int *sink_order,
00282                                  t_rt_node ** rt_node_of_sink)
00283 {
00284     free(pin_criticality + 1);  /* Starts at index 1. */
00285     free(sink_order + 1);
00286     free(rt_node_of_sink + 1);
00287     free_route_tree_timing_structs();
00288 }
00289
00290
00291 /** Returns the largest number of pins on any non-global net.    */
00292 static int
00293 get_max_pins_per_net(void)
00294 {
00295     int inet, max_pins_per_net;
00296
00297     max_pins_per_net = 0;
00298     for(inet = 0; inet < num_nets; inet++)
00299         {
00300             if(clb_net[inet].is_global == FALSE)
00301                 {
00302                     max_pins_per_net =
00303                         max(max_pins_per_net, (clb_net[inet].num_sinks + 1));
00304                 }
00305         }
00306
00307     return (max_pins_per_net);
00308 }
00309
00310 /** Returns TRUE as long is found some way to hook up this net, even if that
00311  * way resulted in overuse of resources (congestion).  If there is no way
00312  * to route this net, even ignoring congestion, it returns FALSE.  In this
00313  * case the rr_graph is disconnected and you can give up.
00314  */
00315 boolean
00316 timing_driven_route_net(int inet,
00317                         float pres_fac,
00318                         float max_criticality,
00319                         float criticality_exp,
00320                         float astar_fac,
00321                         float bend_cost,
00322                         float *net_slack,
00323                         float *pin_criticality,
00324                         int *sink_order,
00325                         t_rt_node ** rt_node_of_sink,
00326                         float T_crit,
00327                         float *net_delay)
00328 {
00329
00330     int ipin, num_sinks, itarget, target_pin, target_node, inode;
00331     float target_criticality, old_tcost, new_tcost, largest_criticality,
00332         pin_crit;
00333     float old_back_cost, new_back_cost;
00334     t_rt_node *rt_root;
00335     struct s_heap *current;
00336     struct s_trace *new_route_start_tptr;
00337         int highfanout_rlim;
00338
00339 /* Rip-up any old routing. */
00340
00341     pathfinder_update_one_cost(trace_head[inet], -1, pres_fac);
00342     free_traceback(inet);
00343
00344     for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
00345         {                       /* For all sinks */
00346             pin_crit = max(max_criticality - net_slack[ipin] / T_crit, 0.);
00347             pin_crit = pow(pin_crit, criticality_exp);
00348             pin_crit = min(pin_crit, max_criticality);
00349             pin_criticality[ipin] = pin_crit;
00350         }
00351
00352     num_sinks = clb_net[inet].num_sinks;
00353     heapsort(sink_order, pin_criticality, num_sinks, 0);
00354
00355 /* Update base costs according to fanout and criticality rules */
00356
00357     largest_criticality = pin_criticality[sink_order[1]];
00358     update_rr_base_costs(inet, largest_criticality);
00359
00360     mark_ends(inet);            /* Only needed to check for multiply-connected SINKs */
00361
00362     rt_root = init_route_tree_to_source(inet);
00363
00364     for(itarget = 1; itarget <= num_sinks; itarget++)
00365         {
00366             target_pin = sink_order[itarget];
00367             target_node = net_rr_terminals[inet][target_pin];
00368
00369             target_criticality = pin_criticality[target_pin];
00370
00371                 highfanout_rlim = mark_node_expansion_by_bin(inet, target_node, rt_root);
00372
00373             add_route_tree_to_heap(rt_root, target_node, target_criticality,
00374                                    astar_fac);
00375
00376             current = get_heap_head();
00377
00378             if(current == NULL)
00379                 {               /* Infeasible routing.  No possible path for net. */
00380                     reset_path_costs();
00381                     free_route_tree(rt_root);
00382                     return (FALSE);
00383                 }
00384
00385             inode = current->index;
00386
00387             while(inode != target_node)
00388                 {
00389                     old_tcost = rr_node_route_inf[inode].path_cost;
00390                     new_tcost = current->cost;
00391
00392                     if(old_tcost > 0.99 * HUGE_FLOAT)   /* First time touched. */
00393                         old_back_cost = HUGE_FLOAT;
00394                     else
00395                         old_back_cost =
00396                             rr_node_route_inf[inode].backward_path_cost;
00397
00398                     new_back_cost = current->backward_path_cost;
00399
00400                     /* I only re-expand a node if both the "known" backward cost is lower  *
00401                      * in the new expansion (this is necessary to prevent loops from       *
00402                      * forming in the routing and causing havoc) *and* the expected total  *
00403                      * cost to the sink is lower than the old value.  Different R_upstream *
00404                      * values could make a path with lower back_path_cost less desirable   *
00405                      * than one with higher cost.  Test whether or not I should disallow   *
00406                      * re-expansion based on a higher total cost.                          */
00407
00408                     if(old_tcost > new_tcost && old_back_cost > new_back_cost)
00409                         {
00410                             rr_node_route_inf[inode].prev_node =
00411                                 current->u.prev_node;
00412                             rr_node_route_inf[inode].prev_edge =
00413                                 current->prev_edge;
00414                             rr_node_route_inf[inode].path_cost = new_tcost;
00415                             rr_node_route_inf[inode].backward_path_cost =
00416                                 new_back_cost;
00417
00418                             if(old_tcost > 0.99 * HUGE_FLOAT)   /* First time touched. */
00419                                 add_to_mod_list(&rr_node_route_inf[inode].
00420                                                 path_cost);
00421
00422                             timing_driven_expand_neighbours(current, inet,
00423                                                             bend_cost,
00424                                                             target_criticality,
00425                                                             target_node,
00426                                                             astar_fac,
00427                                                                 highfanout_rlim);
00428                         }
00429
00430                     free_heap_data(current);
00431                     current = get_heap_head();
00432
00433                     if(current == NULL)
00434                         {       /* Impossible routing.  No path for net. */
00435                             reset_path_costs();
00436                             free_route_tree(rt_root);
00437                             return (FALSE);
00438                         }
00439
00440                     inode = current->index;
00441                 }
00442
00443 /* NB:  In the code below I keep two records of the partial routing:  the   *
00444  * traceback and the route_tree.  The route_tree enables fast recomputation *
00445  * of the Elmore delay to each node in the partial routing.  The traceback  *
00446  * lets me reuse all the routines written for breadth-first routing, which  *
00447  * all take a traceback structure as input.  Before this routine exits the  *
00448  * route_tree structure is destroyed; only the traceback is needed at that  *
00449  * point.                                                                   */
00450
00451             rr_node_route_inf[inode].target_flag--;     /* Connected to this SINK. */
00452             new_route_start_tptr = update_traceback(current, inet);
00453             rt_node_of_sink[target_pin] = update_route_tree(current);
00454             free_heap_data(current);
00455             pathfinder_update_one_cost(new_route_start_tptr, 1, pres_fac);
00456
00457             empty_heap();
00458             reset_path_costs();
00459         }
00460
00461 /* For later timing analysis. */
00462
00463     update_net_delays_from_route_tree(net_delay, rt_node_of_sink, inet);
00464     free_route_tree(rt_root);
00465     return (TRUE);
00466 }
00467
00468 /** Puts the entire partial routing below and including rt_node onto the heap
00469  * (except for those parts marked as not to be expanded) by calling itself
00470  * recursively.
00471  */
00472 static void
00473 add_route_tree_to_heap(t_rt_node * rt_node,
00474                        int target_node,
00475                        float target_criticality,
00476                        float astar_fac)
00477 {
00478
00479     int inode;
00480     t_rt_node *child_node;
00481     t_linked_rt_edge *linked_rt_edge;
00482     float tot_cost, backward_path_cost, R_upstream;
00483
00484 /* Pre-order depth-first traversal */
00485
00486     if(rt_node->re_expand)
00487         {
00488             inode = rt_node->inode;
00489             backward_path_cost = target_criticality * rt_node->Tdel;
00490             R_upstream = rt_node->R_upstream;
00491             tot_cost =
00492                 backward_path_cost +
00493                 astar_fac * get_timing_driven_expected_cost(inode,
00494                                                             target_node,
00495                                                             target_criticality,
00496                                                             R_upstream);
00497             node_to_heap(inode, tot_cost, NO_PREVIOUS, NO_PREVIOUS,
00498                          backward_path_cost, R_upstream);
00499         }
00500
00501     linked_rt_edge = rt_node->u.child_list;
00502
00503     while(linked_rt_edge != NULL)
00504         {
00505             child_node = linked_rt_edge->child;
00506             add_route_tree_to_heap(child_node, target_node,
00507                                    target_criticality, astar_fac);
00508             linked_rt_edge = linked_rt_edge->next;
00509         }
00510 }
00511
00512 /** Puts all the rr_nodes adjacent to current on the heap.  rr_nodes outside
00513  * the expanded bounding box specified in route_bb are not added to the
00514  * heap.
00515  */
00516 static void
00517 timing_driven_expand_neighbours(struct s_heap *current,
00518                                 int inet,
00519                                 float bend_cost,
00520                                 float criticality_fac,
00521                                 int target_node,
00522                                 float astar_fac,
00523                                 int highfanout_rlim)
00524 {
00525     int iconn, to_node, num_edges, inode, iswitch, target_x, target_y;
00526     t_rr_type from_type, to_type;
00527     float new_tot_cost, old_back_pcost, new_back_pcost, R_upstream;
00528     float new_R_upstream, Tdel;
00529
00530     inode = current->index;
00531     old_back_pcost = current->backward_path_cost;
00532     R_upstream = current->R_upstream;
00533     num_edges = rr_node[inode].num_edges;
00534
00535     target_x = rr_node[target_node].xhigh;
00536     target_y = rr_node[target_node].yhigh;
00537
00538     for(iconn = 0; iconn < num_edges; iconn++)
00539         {
00540             to_node = rr_node[inode].edges[iconn];
00541
00542             if(rr_node[to_node].xhigh < route_bb[inet].xmin ||
00543                rr_node[to_node].xlow > route_bb[inet].xmax ||
00544                rr_node[to_node].yhigh < route_bb[inet].ymin ||
00545                rr_node[to_node].ylow > route_bb[inet].ymax)
00546                 continue;       /* Node is outside (expanded) bounding box. */
00547
00548                 if(clb_net[inet].num_sinks >= HIGH_FANOUT_NET_LIM) {
00549                         if(rr_node[to_node].xhigh < target_x - highfanout_rlim ||
00550                                 rr_node[to_node].xlow > target_x + highfanout_rlim ||
00551                                 rr_node[to_node].yhigh < target_y - highfanout_rlim ||
00552                                 rr_node[to_node].ylow > target_y + highfanout_rlim)
00553                         continue;       /* Node is outside high fanout bin. */
00554                 }
00555
00556
00557 /* Prune away IPINs that lead to blocks other than the target one.  Avoids  *
00558  * the issue of how to cost them properly so they don't get expanded before *
00559  * more promising routes, but makes route-throughs (via CLBs) impossible.   *
00560  * Change this if you want to investigate route-throughs.                   */
00561
00562             to_type = rr_node[to_node].type;
00563             if(to_type == IPIN && (rr_node[to_node].xhigh != target_x ||
00564                                    rr_node[to_node].yhigh != target_y))
00565                 continue;
00566
00567
00568 /* new_back_pcost stores the "known" part of the cost to this node -- the   *
00569  * congestion cost of all the routing resources back to the existing route  *
00570  * plus the known delay of the total path back to the source.  new_tot_cost *
00571  * is this "known" backward cost + an expected cost to get to the target.   */
00572
00573             new_back_pcost = old_back_pcost + (1. - criticality_fac) *
00574                 get_rr_cong_cost(to_node);
00575
00576             iswitch = rr_node[inode].switches[iconn];
00577             if(switch_inf[iswitch].buffered)
00578                 {
00579                     new_R_upstream = switch_inf[iswitch].R;
00580                 }
00581             else
00582                 {
00583                     new_R_upstream = R_upstream + switch_inf[iswitch].R;
00584                 }
00585
00586             Tdel =
00587                 rr_node[to_node].C * (new_R_upstream +
00588                                       0.5 * rr_node[to_node].R);
00589             Tdel += switch_inf[iswitch].Tdel;
00590             new_R_upstream += rr_node[to_node].R;
00591             new_back_pcost += criticality_fac * Tdel;
00592
00593             if(bend_cost != 0.)
00594                 {
00595                     from_type = rr_node[inode].type;
00596                     to_type = rr_node[to_node].type;
00597                     if((from_type == CHANX && to_type == CHANY) ||
00598                        (from_type == CHANY && to_type == CHANX))
00599                         new_back_pcost += bend_cost;
00600                 }
00601
00602             new_tot_cost = new_back_pcost + astar_fac *
00603                 get_timing_driven_expected_cost(to_node, target_node,
00604                                                 criticality_fac,
00605                                                 new_R_upstream);
00606
00607             node_to_heap(to_node, new_tot_cost, inode, iconn, new_back_pcost,
00608                          new_R_upstream);
00609
00610         }                       /* End for all neighbours */
00611 }
00612
00613 /** Determines the expected cost (due to both delay and resouce cost) to reach
00614  * the target node from inode.  It doesn't include the cost of inode --
00615  * that's already in the "known" path_cost.
00616  */
00617 static float
00618 get_timing_driven_expected_cost(int inode,
00619                                 int target_node,
00620                                 float criticality_fac,
00621                                 float R_upstream)
00622 {
00623     t_rr_type rr_type;
00624     int cost_index, ortho_cost_index, num_segs_same_dir, num_segs_ortho_dir;
00625     float expected_cost, cong_cost, Tdel;
00626
00627     rr_type = rr_node[inode].type;
00628
00629     if(rr_type == CHANX || rr_type == CHANY)
00630         {
00631             num_segs_same_dir =
00632                 get_expected_segs_to_target(inode, target_node,
00633                                             &num_segs_ortho_dir);
00634             cost_index = rr_node[inode].cost_index;
00635             ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index;
00636
00637             cong_cost =
00638                 num_segs_same_dir * rr_indexed_data[cost_index].base_cost +
00639                 num_segs_ortho_dir *
00640                 rr_indexed_data[ortho_cost_index].base_cost;
00641             cong_cost +=
00642                 rr_indexed_data[IPIN_COST_INDEX].base_cost +
00643                 rr_indexed_data[SINK_COST_INDEX].base_cost;
00644
00645             Tdel = num_segs_same_dir * rr_indexed_data[cost_index].T_linear +
00646                 num_segs_ortho_dir *
00647                 rr_indexed_data[ortho_cost_index].T_linear +
00648                 num_segs_same_dir * num_segs_same_dir *
00649                 rr_indexed_data[cost_index].T_quadratic +
00650                 num_segs_ortho_dir * num_segs_ortho_dir *
00651                 rr_indexed_data[ortho_cost_index].T_quadratic +
00652                 R_upstream * (num_segs_same_dir *
00653                               rr_indexed_data[cost_index].C_load +
00654                               num_segs_ortho_dir *
00655                               rr_indexed_data[ortho_cost_index].C_load);
00656
00657             Tdel += rr_indexed_data[IPIN_COST_INDEX].T_linear;
00658
00659             expected_cost =
00660                 criticality_fac * Tdel + (1. - criticality_fac) * cong_cost;
00661             return (expected_cost);
00662         }
00663
00664     else if(rr_type == IPIN)
00665         {                       /* Change if you're allowing route-throughs */
00666             return (rr_indexed_data[SINK_COST_INDEX].base_cost);
00667         }
00668
00669     else
00670         {                       /* Change this if you want to investigate route-throughs */
00671             return (0.);
00672         }
00673 }
00674
00675
00676 /** Macro used below to ensure that fractions are rounded up, but floating
00677  * point values very close to an integer are rounded to that integer.
00678  */
00679 #define ROUND_UP(x) (ceil (x - 0.001))
00680
00681 /** Returns the number of segments the same type as inode that will be needed
00682  * to reach target_node (not including inode) in each direction (the same
00683  * direction (horizontal or vertical) as inode and the orthogonal direction).
00684  */
00685 static int
00686 get_expected_segs_to_target(int inode,
00687                             int target_node,
00688                             int *num_segs_ortho_dir_ptr)
00689 {
00690     t_rr_type rr_type;
00691     int target_x, target_y, num_segs_same_dir, cost_index, ortho_cost_index;
00692     int no_need_to_pass_by_clb;
00693     float inv_length, ortho_inv_length, ylow, yhigh, xlow, xhigh;
00694
00695     target_x = rr_node[target_node].xlow;
00696     target_y = rr_node[target_node].ylow;
00697     cost_index = rr_node[inode].cost_index;
00698     inv_length = rr_indexed_data[cost_index].inv_length;
00699     ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index;
00700     ortho_inv_length = rr_indexed_data[ortho_cost_index].inv_length;
00701     rr_type = rr_node[inode].type;
00702
00703     if(rr_type == CHANX)
00704         {
00705             ylow = rr_node[inode].ylow;
00706             xhigh = rr_node[inode].xhigh;
00707             xlow = rr_node[inode].xlow;
00708
00709             /* Count vertical (orthogonal to inode) segs first. */
00710
00711             if(ylow > target_y)
00712                 {               /* Coming from a row above target? */
00713                     *num_segs_ortho_dir_ptr =
00714                         ROUND_UP((ylow - target_y + 1.) * ortho_inv_length);
00715                     no_need_to_pass_by_clb = 1;
00716                 }
00717             else if(ylow < target_y - 1)
00718                 {               /* Below the CLB bottom? */
00719                     *num_segs_ortho_dir_ptr = ROUND_UP((target_y - ylow) *
00720                                                        ortho_inv_length);
00721                     no_need_to_pass_by_clb = 1;
00722                 }
00723             else
00724                 {               /* In a row that passes by target CLB */
00725                     *num_segs_ortho_dir_ptr = 0;
00726                     no_need_to_pass_by_clb = 0;
00727                 }
00728
00729             /* Now count horizontal (same dir. as inode) segs. */
00730
00731             if(xlow > target_x + no_need_to_pass_by_clb)
00732                 {
00733                     num_segs_same_dir =
00734                         ROUND_UP((xlow - no_need_to_pass_by_clb -
00735                                   target_x) * inv_length);
00736                 }
00737             else if(xhigh < target_x - no_need_to_pass_by_clb)
00738                 {
00739                     num_segs_same_dir =
00740                         ROUND_UP((target_x - no_need_to_pass_by_clb -
00741                                   xhigh) * inv_length);
00742                 }
00743             else
00744                 {
00745                     num_segs_same_dir = 0;
00746                 }
00747         }
00748
00749     else
00750         {                       /* inode is a CHANY */
00751             ylow = rr_node[inode].ylow;
00752             yhigh = rr_node[inode].yhigh;
00753             xlow = rr_node[inode].xlow;
00754
00755             /* Count horizontal (orthogonal to inode) segs first. */
00756
00757             if(xlow > target_x)
00758                 {               /* Coming from a column right of target? */
00759                     *num_segs_ortho_dir_ptr =
00760                         ROUND_UP((xlow - target_x + 1.) * ortho_inv_length);
00761                     no_need_to_pass_by_clb = 1;
00762                 }
00763             else if(xlow < target_x - 1)
00764                 {               /* Left of and not adjacent to the CLB? */
00765                     *num_segs_ortho_dir_ptr = ROUND_UP((target_x - xlow) *
00766                                                        ortho_inv_length);
00767                     no_need_to_pass_by_clb = 1;
00768                 }
00769             else
00770                 {               /* In a column that passes by target CLB */
00771                     *num_segs_ortho_dir_ptr = 0;
00772                     no_need_to_pass_by_clb = 0;
00773                 }
00774
00775             /* Now count vertical (same dir. as inode) segs. */
00776
00777             if(ylow > target_y + no_need_to_pass_by_clb)
00778                 {
00779                     num_segs_same_dir =
00780                         ROUND_UP((ylow - no_need_to_pass_by_clb -
00781                                   target_y) * inv_length);
00782                 }
00783             else if(yhigh < target_y - no_need_to_pass_by_clb)
00784                 {
00785                     num_segs_same_dir =
00786                         ROUND_UP((target_y - no_need_to_pass_by_clb -
00787                                   yhigh) * inv_length);
00788                 }
00789             else
00790                 {
00791                     num_segs_same_dir = 0;
00792                 }
00793         }
00794
00795     return (num_segs_same_dir);
00796 }
00797
00798 /** Changes the base costs of different types of rr_nodes according to the
00799  * criticality, fanout, etc. of the current net being routed (inet).
00800  */
00801 static void
00802 update_rr_base_costs(int inet,
00803                      float largest_criticality)
00804 {
00805
00806     float fanout, factor;
00807     int index;
00808
00809     fanout = clb_net[inet].num_sinks;
00810
00811     /* Other reasonable values for factor include fanout and 1 */
00812     factor = sqrt(fanout);
00813
00814     for(index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++)
00815         {
00816             if(rr_indexed_data[index].T_quadratic > 0.)
00817                 {               /* pass transistor */
00818                     rr_indexed_data[index].base_cost =
00819                         rr_indexed_data[index].saved_base_cost * factor;
00820                 }
00821             else
00822                 {
00823                     rr_indexed_data[index].base_cost =
00824                         rr_indexed_data[index].saved_base_cost;
00825                 }
00826         }
00827 }
00828
00829 /** Nets that have high fanout can take a very long time to route.  Each sink should be routed contained within a bin instead of the entire bounding box to speed things up */
00830 static int mark_node_expansion_by_bin(int inet, int target_node, t_rt_node * rt_node) {
00831         int target_x, target_y;
00832         int rlim = 1;
00833         int inode;
00834         float area;
00835         boolean success;
00836         t_linked_rt_edge *linked_rt_edge;
00837         t_rt_node * child_node;
00838
00839         target_x = rr_node[target_node].xlow;
00840     target_y = rr_node[target_node].ylow;
00841
00842         if(clb_net[inet].num_sinks < HIGH_FANOUT_NET_LIM) {
00843                 /* This algorithm only applies to high fanout nets */
00844                 return 1;
00845         }
00846
00847         area = (route_bb[inet].xmax - route_bb[inet].xmin) * (route_bb[inet].ymax - route_bb[inet].ymin);
00848         if(area <= 0) {
00849                 area = 1;
00850         }
00851
00852         rlim = ceil(sqrt((float)area / (float)clb_net[inet].num_sinks));
00853         if(rt_node == NULL || rt_node->u.child_list == NULL) {
00854                 /* If unknown traceback, set radius of bin to be size of chip */
00855                 rlim = max(nx + 2, ny + 2);
00856                 return rlim;
00857         }
00858
00859         success = FALSE;
00860         /* determine quickly a feasible bin radius to route sink for high fanout nets
00861            this is necessary to prevent super long runtimes for high fanout nets; in best case, a reduction in complexity from O(N^2logN) to O(NlogN) (Swartz fast router)
00862         */
00863         linked_rt_edge = rt_node->u.child_list;
00864         while(success == FALSE && linked_rt_edge != NULL) {
00865                 while(linked_rt_edge != NULL && success == FALSE)
00866                 {
00867                         child_node = linked_rt_edge->child;
00868                         inode = child_node->inode;
00869                         if(!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) {
00870                                 if(rr_node[inode].xlow <= target_x + rlim &&
00871                                         rr_node[inode].xhigh >= target_x - rlim &&
00872                                         rr_node[inode].ylow <= target_y + rlim &&
00873                                         rr_node[inode].yhigh >= target_y - rlim) {
00874                                         success = TRUE;
00875                                 }
00876                         }
00877                         linked_rt_edge = linked_rt_edge->next;
00878                 }
00879
00880                 if(success == FALSE) {
00881                         if(rlim > max(nx + 2, ny + 2)) {
00882                                 printf(ERRTAG "VPR internal error, net %s has paths that are not found in traceback\n", clb_net[inet].name);
00883                                 exit(1);
00884                         }
00885                         /* if sink not in bin, increase bin size until fit */
00886                         rlim *= 2;
00887                 } else {
00888                         /* Sometimes might just catch a wire in the end segment, need to give it some channel space to explore */
00889                         rlim += 4;
00890                 }
00891                 linked_rt_edge = rt_node->u.child_list;
00892         }
00893
00894         /* redetermine expansion based on rlim */
00895         linked_rt_edge = rt_node->u.child_list;
00896         while(linked_rt_edge != NULL)
00897         {
00898                 child_node = linked_rt_edge->child;
00899                 inode = child_node->inode;
00900                 if(!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) {
00901                         if(rr_node[inode].xlow <= target_x + rlim &&
00902                                 rr_node[inode].xhigh >= target_x - rlim &&
00903                                 rr_node[inode].ylow <= target_y + rlim &&
00904                                 rr_node[inode].yhigh >= target_y - rlim) {
00905                                 child_node->re_expand = TRUE;
00906                         } else {
00907                                 child_node->re_expand = FALSE;
00908                         }
00909                 }
00910                 linked_rt_edge = linked_rt_edge->next;
00911         }
00912         return rlim;
00913 }
00914
00915
00916 #define ERROR_TOL 0.0001
00917
00918 /** Checks that the net delays computed incrementally during timing driven
00919  * routing match those computed from scratch by the net_delay.c module.
00920  */
00921 static void
00922 timing_driven_check_net_delays(float **net_delay)
00923 {
00924     int inet, ipin;
00925     float **net_delay_check;
00926     struct s_linked_vptr *ch_list_head_net_delay_check;
00927
00928     net_delay_check = alloc_net_delay(&ch_list_head_net_delay_check, clb_net, num_nets);
00929     load_net_delay_from_routing(net_delay_check, clb_net, num_nets);
00930
00931     for(inet = 0; inet < num_nets; inet++)
00932         {
00933             for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
00934                 {
00935                     if(net_delay_check[inet][ipin] == 0.)
00936                         {       /* Should be only GLOBAL nets */
00937                             if(net_delay[inet][ipin] != 0.)
00938                                 {
00939                                     printf
00940                                         ("Error in timing_driven_check_net_delays: net %d pin %d."
00941                                          "\tIncremental calc. net_delay is %g, but from scratch "
00942                                          "net delay is %g.\n", inet, ipin,
00943                                          net_delay[inet][ipin],
00944                                          net_delay_check[inet][ipin]);
00945                                     exit(1);
00946                                 }
00947                         }
00948                     else
00949                         {
00950                             if(fabs
00951                                (1. -
00952                                 net_delay[inet][ipin] /
00953                                 net_delay_check[inet][ipin]) > ERROR_TOL)
00954                                 {
00955                                     printf
00956                                         ("Error in timing_driven_check_net_delays: net %d pin %d."
00957                                          "\tIncremental calc. net_delay is %g, but from scratch "
00958                                          "net delay is %g.\n", inet, ipin,
00959                                          net_delay[inet][ipin],
00960                                          net_delay_check[inet][ipin]);
00961                                     exit(1);
00962                                 }
00963                         }
00964                 }
00965         }
00966
00967     free_net_delay(net_delay_check, &ch_list_head_net_delay_check);
00968     printf("Completed net delay value cross check successfully.\n");
00969 }