VPR-6.0
|
00001 #include <stdio.h> 00002 #include <math.h> 00003 #include "util.h" 00004 #include "vpr_types.h" 00005 #include "globals.h" 00006 #include "mst.h" 00007 #include "route_export.h" 00008 #include "route_common.h" 00009 #include "route_tree_timing.h" 00010 #include "route_timing.h" 00011 #include "heapsort.h" 00012 #include "path_delay.h" 00013 #include "net_delay.h" 00014 #include "stats.h" 00015 00016 /******************** Subroutines local to route_timing.c ********************/ 00017 00018 static int get_max_pins_per_net(void); 00019 00020 static void add_route_tree_to_heap(t_rt_node * rt_node, 00021 int target_node, 00022 float target_criticality, 00023 float astar_fac); 00024 00025 static void timing_driven_expand_neighbours(struct s_heap *current, 00026 int inet, 00027 float bend_cost, 00028 float criticality_fac, 00029 int target_node, 00030 float astar_fac, 00031 int highfanout_rlim); 00032 00033 static float get_timing_driven_expected_cost(int inode, 00034 int target_node, 00035 float criticality_fac, 00036 float R_upstream); 00037 00038 static int get_expected_segs_to_target(int inode, 00039 int target_node, 00040 int *num_segs_ortho_dir_ptr); 00041 00042 static void update_rr_base_costs(int inet, 00043 float largest_criticality); 00044 00045 static void timing_driven_check_net_delays(float **net_delay); 00046 00047 static int mark_node_expansion_by_bin(int inet, int target_node, t_rt_node * rt_node); 00048 00049 00050 /************************ Subroutine definitions *****************************/ 00051 00052 /* Timing-driven routing algorithm. The timing graph (includes net_slack) 00053 * must have already been allocated, and net_delay must have been allocated. 00054 * @returns TRUE if the routing succeeds, FALSE otherwise. 00055 */ 00056 boolean 00057 try_timing_driven_route(struct s_router_opts router_opts, 00058 float **net_slack, 00059 float **net_delay, 00060 t_ivec ** clb_opins_used_locally) 00061 { 00062 00063 int itry, inet, ipin, i; 00064 boolean success, is_routable, rip_up_local_opins; 00065 float *pin_criticality; /* [1..max_pins_per_net-1]. */ 00066 int *sink_order; /* [1..max_pins_per_net-1]. */ 00067 t_rt_node **rt_node_of_sink; /* [1..max_pins_per_net-1]. */ 00068 float T_crit, pres_fac; 00069 00070 float *sinks; 00071 int *net_index; 00072 00073 int bends; 00074 int wirelength, total_wirelength, available_wirelength; 00075 int segments; 00076 00077 sinks = my_malloc(sizeof(float) * num_nets); 00078 net_index = my_malloc(sizeof(int) * num_nets); 00079 for(i = 0; i < num_nets; i++) { 00080 sinks[i] = clb_net[i].num_sinks; 00081 net_index[i] = i; 00082 } 00083 heapsort(net_index, sinks, num_nets, 1); 00084 00085 alloc_timing_driven_route_structs(&pin_criticality, &sink_order, 00086 &rt_node_of_sink); 00087 00088 /* First do one routing iteration ignoring congestion and marking all sinks * 00089 * on each net as critical to get reasonable net delay estimates. */ 00090 00091 for(inet = 0; inet < num_nets; inet++) 00092 { 00093 if(clb_net[inet].is_global == FALSE) 00094 { 00095 for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) 00096 net_slack[inet][ipin] = 0.; 00097 } 00098 else 00099 { /* Set delay of global signals to zero. */ 00100 for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) 00101 net_delay[inet][ipin] = 0.; 00102 } 00103 } 00104 00105 T_crit = 1.; 00106 pres_fac = router_opts.first_iter_pres_fac; /* Typically 0 -> ignore cong. */ 00107 00108 for(itry = 1; itry <= router_opts.max_router_iterations; itry++) 00109 { 00110 00111 for(i = 0; i < num_nets; i++) 00112 { 00113 inet = net_index[i]; 00114 if(clb_net[inet].is_global == FALSE) 00115 { /* Skip global nets. */ 00116 00117 is_routable = 00118 timing_driven_route_net(inet, pres_fac, 00119 router_opts. 00120 max_criticality, 00121 router_opts. 00122 criticality_exp, 00123 router_opts.astar_fac, 00124 router_opts.bend_cost, 00125 net_slack[inet], 00126 pin_criticality, 00127 sink_order, 00128 rt_node_of_sink, 00129 T_crit, 00130 net_delay[inet]); 00131 00132 /* Impossible to route? (disconnected rr_graph) */ 00133 00134 if(!is_routable) 00135 { 00136 printf("Routing failed.\n"); 00137 free_timing_driven_route_structs 00138 (pin_criticality, sink_order, 00139 rt_node_of_sink); 00140 free(net_index); 00141 free(sinks); 00142 return (FALSE); 00143 } 00144 } 00145 } 00146 00147 00148 if(itry == 1) { 00149 /* Early exit code for cases where it is obvious that a successful route will not be found 00150 Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit 00151 */ 00152 total_wirelength = 0; 00153 available_wirelength = 0; 00154 00155 for(i = 0; i < num_rr_nodes; i++) { 00156 if(rr_node[i].type == CHANX || rr_node[i].type == CHANY) 00157 { 00158 available_wirelength += 1 + rr_node[i].xhigh - rr_node[i].xlow + 00159 rr_node[i].yhigh - rr_node[i].ylow; 00160 } 00161 } 00162 00163 for(inet = 0; inet < num_nets; inet++) 00164 { 00165 if(clb_net[inet].is_global == FALSE && clb_net[inet].num_sinks != 0) 00166 { /* Globals don't count. */ 00167 get_num_bends_and_length(inet, &bends, &wirelength, 00168 &segments); 00169 00170 total_wirelength += wirelength; 00171 } 00172 } 00173 printf("wirelength after first iteration %d, total available wirelength %d, ratio %g\n", total_wirelength, available_wirelength, (float)(total_wirelength)/(float)(available_wirelength)); 00174 if((float)(total_wirelength)/(float)(available_wirelength) > FIRST_ITER_WIRELENTH_LIMIT) { 00175 printf("Wirelength usage ratio exceeds limit of %g, fail routing\n", FIRST_ITER_WIRELENTH_LIMIT); 00176 free_timing_driven_route_structs 00177 (pin_criticality, sink_order, 00178 rt_node_of_sink); 00179 free(net_index); 00180 free(sinks); 00181 return FALSE; 00182 } 00183 } 00184 00185 00186 /* Make sure any CLB OPINs used up by subblocks being hooked directly * 00187 * to them are reserved for that purpose. */ 00188 00189 if(itry == 1) 00190 rip_up_local_opins = FALSE; 00191 else 00192 rip_up_local_opins = TRUE; 00193 00194 reserve_locally_used_opins(pres_fac, rip_up_local_opins, 00195 clb_opins_used_locally); 00196 00197 /* Pathfinder guys quit after finding a feasible route. I may want to keep * 00198 * going longer, trying to improve timing. Think about this some. */ 00199 00200 success = feasible_routing(); 00201 if(success) 00202 { 00203 printf 00204 ("Successfully routed after %d routing iterations.\n", 00205 itry); 00206 free_timing_driven_route_structs(pin_criticality, 00207 sink_order, 00208 rt_node_of_sink); 00209 #ifdef DEBUG 00210 timing_driven_check_net_delays(net_delay); 00211 #endif 00212 free(net_index); 00213 free(sinks); 00214 return (TRUE); 00215 } 00216 00217 if(itry == 1) 00218 { 00219 pres_fac = router_opts.initial_pres_fac; 00220 pathfinder_update_cost(pres_fac, 0.); /* Acc_fac=0 for first iter. */ 00221 } 00222 else 00223 { 00224 pres_fac *= router_opts.pres_fac_mult; 00225 00226 /* Avoid overflow for high iteration counts, even if acc_cost is big */ 00227 pres_fac = min (pres_fac, HUGE_FLOAT / 1e5); 00228 00229 pathfinder_update_cost(pres_fac, router_opts.acc_fac); 00230 } 00231 00232 /* Update slack values by doing another timing analysis. * 00233 * Timing_driven_route_net updated the net delay values. */ 00234 00235 load_timing_graph_net_delays(net_delay); 00236 T_crit = load_net_slack(net_slack, 0); 00237 printf("T_crit: %g.\n", T_crit); 00238 fflush(stdout); 00239 } 00240 00241 printf("Routing failed.\n"); 00242 free_timing_driven_route_structs(pin_criticality, sink_order, 00243 rt_node_of_sink); 00244 free(net_index); 00245 free(sinks); 00246 return (FALSE); 00247 } 00248 00249 00250 /** Allocates all the structures needed only by the timing-driven router. */ 00251 void 00252 alloc_timing_driven_route_structs(float **pin_criticality_ptr, 00253 int **sink_order_ptr, 00254 t_rt_node *** rt_node_of_sink_ptr) 00255 { 00256 int max_pins_per_net; 00257 float *pin_criticality; 00258 int *sink_order; 00259 t_rt_node **rt_node_of_sink; 00260 00261 max_pins_per_net = get_max_pins_per_net(); 00262 00263 pin_criticality = 00264 (float *)my_malloc((max_pins_per_net - 1) * sizeof(float)); 00265 *pin_criticality_ptr = pin_criticality - 1; /* First sink is pin #1. */ 00266 00267 sink_order = (int *)my_malloc((max_pins_per_net - 1) * sizeof(int)); 00268 *sink_order_ptr = sink_order - 1; 00269 00270 rt_node_of_sink = (t_rt_node **) my_malloc((max_pins_per_net - 1) * 00271 sizeof(t_rt_node *)); 00272 *rt_node_of_sink_ptr = rt_node_of_sink - 1; 00273 00274 alloc_route_tree_timing_structs(); 00275 } 00276 00277 00278 /** Frees all the stuctures needed only by the timing-driven router. */ 00279 void 00280 free_timing_driven_route_structs(float *pin_criticality, 00281 int *sink_order, 00282 t_rt_node ** rt_node_of_sink) 00283 { 00284 free(pin_criticality + 1); /* Starts at index 1. */ 00285 free(sink_order + 1); 00286 free(rt_node_of_sink + 1); 00287 free_route_tree_timing_structs(); 00288 } 00289 00290 00291 /** Returns the largest number of pins on any non-global net. */ 00292 static int 00293 get_max_pins_per_net(void) 00294 { 00295 int inet, max_pins_per_net; 00296 00297 max_pins_per_net = 0; 00298 for(inet = 0; inet < num_nets; inet++) 00299 { 00300 if(clb_net[inet].is_global == FALSE) 00301 { 00302 max_pins_per_net = 00303 max(max_pins_per_net, (clb_net[inet].num_sinks + 1)); 00304 } 00305 } 00306 00307 return (max_pins_per_net); 00308 } 00309 00310 /** Returns TRUE as long is found some way to hook up this net, even if that 00311 * way resulted in overuse of resources (congestion). If there is no way 00312 * to route this net, even ignoring congestion, it returns FALSE. In this 00313 * case the rr_graph is disconnected and you can give up. 00314 */ 00315 boolean 00316 timing_driven_route_net(int inet, 00317 float pres_fac, 00318 float max_criticality, 00319 float criticality_exp, 00320 float astar_fac, 00321 float bend_cost, 00322 float *net_slack, 00323 float *pin_criticality, 00324 int *sink_order, 00325 t_rt_node ** rt_node_of_sink, 00326 float T_crit, 00327 float *net_delay) 00328 { 00329 00330 int ipin, num_sinks, itarget, target_pin, target_node, inode; 00331 float target_criticality, old_tcost, new_tcost, largest_criticality, 00332 pin_crit; 00333 float old_back_cost, new_back_cost; 00334 t_rt_node *rt_root; 00335 struct s_heap *current; 00336 struct s_trace *new_route_start_tptr; 00337 int highfanout_rlim; 00338 00339 /* Rip-up any old routing. */ 00340 00341 pathfinder_update_one_cost(trace_head[inet], -1, pres_fac); 00342 free_traceback(inet); 00343 00344 for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) 00345 { /* For all sinks */ 00346 pin_crit = max(max_criticality - net_slack[ipin] / T_crit, 0.); 00347 pin_crit = pow(pin_crit, criticality_exp); 00348 pin_crit = min(pin_crit, max_criticality); 00349 pin_criticality[ipin] = pin_crit; 00350 } 00351 00352 num_sinks = clb_net[inet].num_sinks; 00353 heapsort(sink_order, pin_criticality, num_sinks, 0); 00354 00355 /* Update base costs according to fanout and criticality rules */ 00356 00357 largest_criticality = pin_criticality[sink_order[1]]; 00358 update_rr_base_costs(inet, largest_criticality); 00359 00360 mark_ends(inet); /* Only needed to check for multiply-connected SINKs */ 00361 00362 rt_root = init_route_tree_to_source(inet); 00363 00364 for(itarget = 1; itarget <= num_sinks; itarget++) 00365 { 00366 target_pin = sink_order[itarget]; 00367 target_node = net_rr_terminals[inet][target_pin]; 00368 00369 target_criticality = pin_criticality[target_pin]; 00370 00371 highfanout_rlim = mark_node_expansion_by_bin(inet, target_node, rt_root); 00372 00373 add_route_tree_to_heap(rt_root, target_node, target_criticality, 00374 astar_fac); 00375 00376 current = get_heap_head(); 00377 00378 if(current == NULL) 00379 { /* Infeasible routing. No possible path for net. */ 00380 reset_path_costs(); 00381 free_route_tree(rt_root); 00382 return (FALSE); 00383 } 00384 00385 inode = current->index; 00386 00387 while(inode != target_node) 00388 { 00389 old_tcost = rr_node_route_inf[inode].path_cost; 00390 new_tcost = current->cost; 00391 00392 if(old_tcost > 0.99 * HUGE_FLOAT) /* First time touched. */ 00393 old_back_cost = HUGE_FLOAT; 00394 else 00395 old_back_cost = 00396 rr_node_route_inf[inode].backward_path_cost; 00397 00398 new_back_cost = current->backward_path_cost; 00399 00400 /* I only re-expand a node if both the "known" backward cost is lower * 00401 * in the new expansion (this is necessary to prevent loops from * 00402 * forming in the routing and causing havoc) *and* the expected total * 00403 * cost to the sink is lower than the old value. Different R_upstream * 00404 * values could make a path with lower back_path_cost less desirable * 00405 * than one with higher cost. Test whether or not I should disallow * 00406 * re-expansion based on a higher total cost. */ 00407 00408 if(old_tcost > new_tcost && old_back_cost > new_back_cost) 00409 { 00410 rr_node_route_inf[inode].prev_node = 00411 current->u.prev_node; 00412 rr_node_route_inf[inode].prev_edge = 00413 current->prev_edge; 00414 rr_node_route_inf[inode].path_cost = new_tcost; 00415 rr_node_route_inf[inode].backward_path_cost = 00416 new_back_cost; 00417 00418 if(old_tcost > 0.99 * HUGE_FLOAT) /* First time touched. */ 00419 add_to_mod_list(&rr_node_route_inf[inode]. 00420 path_cost); 00421 00422 timing_driven_expand_neighbours(current, inet, 00423 bend_cost, 00424 target_criticality, 00425 target_node, 00426 astar_fac, 00427 highfanout_rlim); 00428 } 00429 00430 free_heap_data(current); 00431 current = get_heap_head(); 00432 00433 if(current == NULL) 00434 { /* Impossible routing. No path for net. */ 00435 reset_path_costs(); 00436 free_route_tree(rt_root); 00437 return (FALSE); 00438 } 00439 00440 inode = current->index; 00441 } 00442 00443 /* NB: In the code below I keep two records of the partial routing: the * 00444 * traceback and the route_tree. The route_tree enables fast recomputation * 00445 * of the Elmore delay to each node in the partial routing. The traceback * 00446 * lets me reuse all the routines written for breadth-first routing, which * 00447 * all take a traceback structure as input. Before this routine exits the * 00448 * route_tree structure is destroyed; only the traceback is needed at that * 00449 * point. */ 00450 00451 rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */ 00452 new_route_start_tptr = update_traceback(current, inet); 00453 rt_node_of_sink[target_pin] = update_route_tree(current); 00454 free_heap_data(current); 00455 pathfinder_update_one_cost(new_route_start_tptr, 1, pres_fac); 00456 00457 empty_heap(); 00458 reset_path_costs(); 00459 } 00460 00461 /* For later timing analysis. */ 00462 00463 update_net_delays_from_route_tree(net_delay, rt_node_of_sink, inet); 00464 free_route_tree(rt_root); 00465 return (TRUE); 00466 } 00467 00468 /** Puts the entire partial routing below and including rt_node onto the heap 00469 * (except for those parts marked as not to be expanded) by calling itself 00470 * recursively. 00471 */ 00472 static void 00473 add_route_tree_to_heap(t_rt_node * rt_node, 00474 int target_node, 00475 float target_criticality, 00476 float astar_fac) 00477 { 00478 00479 int inode; 00480 t_rt_node *child_node; 00481 t_linked_rt_edge *linked_rt_edge; 00482 float tot_cost, backward_path_cost, R_upstream; 00483 00484 /* Pre-order depth-first traversal */ 00485 00486 if(rt_node->re_expand) 00487 { 00488 inode = rt_node->inode; 00489 backward_path_cost = target_criticality * rt_node->Tdel; 00490 R_upstream = rt_node->R_upstream; 00491 tot_cost = 00492 backward_path_cost + 00493 astar_fac * get_timing_driven_expected_cost(inode, 00494 target_node, 00495 target_criticality, 00496 R_upstream); 00497 node_to_heap(inode, tot_cost, NO_PREVIOUS, NO_PREVIOUS, 00498 backward_path_cost, R_upstream); 00499 } 00500 00501 linked_rt_edge = rt_node->u.child_list; 00502 00503 while(linked_rt_edge != NULL) 00504 { 00505 child_node = linked_rt_edge->child; 00506 add_route_tree_to_heap(child_node, target_node, 00507 target_criticality, astar_fac); 00508 linked_rt_edge = linked_rt_edge->next; 00509 } 00510 } 00511 00512 /** Puts all the rr_nodes adjacent to current on the heap. rr_nodes outside 00513 * the expanded bounding box specified in route_bb are not added to the 00514 * heap. 00515 */ 00516 static void 00517 timing_driven_expand_neighbours(struct s_heap *current, 00518 int inet, 00519 float bend_cost, 00520 float criticality_fac, 00521 int target_node, 00522 float astar_fac, 00523 int highfanout_rlim) 00524 { 00525 int iconn, to_node, num_edges, inode, iswitch, target_x, target_y; 00526 t_rr_type from_type, to_type; 00527 float new_tot_cost, old_back_pcost, new_back_pcost, R_upstream; 00528 float new_R_upstream, Tdel; 00529 00530 inode = current->index; 00531 old_back_pcost = current->backward_path_cost; 00532 R_upstream = current->R_upstream; 00533 num_edges = rr_node[inode].num_edges; 00534 00535 target_x = rr_node[target_node].xhigh; 00536 target_y = rr_node[target_node].yhigh; 00537 00538 for(iconn = 0; iconn < num_edges; iconn++) 00539 { 00540 to_node = rr_node[inode].edges[iconn]; 00541 00542 if(rr_node[to_node].xhigh < route_bb[inet].xmin || 00543 rr_node[to_node].xlow > route_bb[inet].xmax || 00544 rr_node[to_node].yhigh < route_bb[inet].ymin || 00545 rr_node[to_node].ylow > route_bb[inet].ymax) 00546 continue; /* Node is outside (expanded) bounding box. */ 00547 00548 if(clb_net[inet].num_sinks >= HIGH_FANOUT_NET_LIM) { 00549 if(rr_node[to_node].xhigh < target_x - highfanout_rlim || 00550 rr_node[to_node].xlow > target_x + highfanout_rlim || 00551 rr_node[to_node].yhigh < target_y - highfanout_rlim || 00552 rr_node[to_node].ylow > target_y + highfanout_rlim) 00553 continue; /* Node is outside high fanout bin. */ 00554 } 00555 00556 00557 /* Prune away IPINs that lead to blocks other than the target one. Avoids * 00558 * the issue of how to cost them properly so they don't get expanded before * 00559 * more promising routes, but makes route-throughs (via CLBs) impossible. * 00560 * Change this if you want to investigate route-throughs. */ 00561 00562 to_type = rr_node[to_node].type; 00563 if(to_type == IPIN && (rr_node[to_node].xhigh != target_x || 00564 rr_node[to_node].yhigh != target_y)) 00565 continue; 00566 00567 00568 /* new_back_pcost stores the "known" part of the cost to this node -- the * 00569 * congestion cost of all the routing resources back to the existing route * 00570 * plus the known delay of the total path back to the source. new_tot_cost * 00571 * is this "known" backward cost + an expected cost to get to the target. */ 00572 00573 new_back_pcost = old_back_pcost + (1. - criticality_fac) * 00574 get_rr_cong_cost(to_node); 00575 00576 iswitch = rr_node[inode].switches[iconn]; 00577 if(switch_inf[iswitch].buffered) 00578 { 00579 new_R_upstream = switch_inf[iswitch].R; 00580 } 00581 else 00582 { 00583 new_R_upstream = R_upstream + switch_inf[iswitch].R; 00584 } 00585 00586 Tdel = 00587 rr_node[to_node].C * (new_R_upstream + 00588 0.5 * rr_node[to_node].R); 00589 Tdel += switch_inf[iswitch].Tdel; 00590 new_R_upstream += rr_node[to_node].R; 00591 new_back_pcost += criticality_fac * Tdel; 00592 00593 if(bend_cost != 0.) 00594 { 00595 from_type = rr_node[inode].type; 00596 to_type = rr_node[to_node].type; 00597 if((from_type == CHANX && to_type == CHANY) || 00598 (from_type == CHANY && to_type == CHANX)) 00599 new_back_pcost += bend_cost; 00600 } 00601 00602 new_tot_cost = new_back_pcost + astar_fac * 00603 get_timing_driven_expected_cost(to_node, target_node, 00604 criticality_fac, 00605 new_R_upstream); 00606 00607 node_to_heap(to_node, new_tot_cost, inode, iconn, new_back_pcost, 00608 new_R_upstream); 00609 00610 } /* End for all neighbours */ 00611 } 00612 00613 /** Determines the expected cost (due to both delay and resouce cost) to reach 00614 * the target node from inode. It doesn't include the cost of inode -- 00615 * that's already in the "known" path_cost. 00616 */ 00617 static float 00618 get_timing_driven_expected_cost(int inode, 00619 int target_node, 00620 float criticality_fac, 00621 float R_upstream) 00622 { 00623 t_rr_type rr_type; 00624 int cost_index, ortho_cost_index, num_segs_same_dir, num_segs_ortho_dir; 00625 float expected_cost, cong_cost, Tdel; 00626 00627 rr_type = rr_node[inode].type; 00628 00629 if(rr_type == CHANX || rr_type == CHANY) 00630 { 00631 num_segs_same_dir = 00632 get_expected_segs_to_target(inode, target_node, 00633 &num_segs_ortho_dir); 00634 cost_index = rr_node[inode].cost_index; 00635 ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index; 00636 00637 cong_cost = 00638 num_segs_same_dir * rr_indexed_data[cost_index].base_cost + 00639 num_segs_ortho_dir * 00640 rr_indexed_data[ortho_cost_index].base_cost; 00641 cong_cost += 00642 rr_indexed_data[IPIN_COST_INDEX].base_cost + 00643 rr_indexed_data[SINK_COST_INDEX].base_cost; 00644 00645 Tdel = num_segs_same_dir * rr_indexed_data[cost_index].T_linear + 00646 num_segs_ortho_dir * 00647 rr_indexed_data[ortho_cost_index].T_linear + 00648 num_segs_same_dir * num_segs_same_dir * 00649 rr_indexed_data[cost_index].T_quadratic + 00650 num_segs_ortho_dir * num_segs_ortho_dir * 00651 rr_indexed_data[ortho_cost_index].T_quadratic + 00652 R_upstream * (num_segs_same_dir * 00653 rr_indexed_data[cost_index].C_load + 00654 num_segs_ortho_dir * 00655 rr_indexed_data[ortho_cost_index].C_load); 00656 00657 Tdel += rr_indexed_data[IPIN_COST_INDEX].T_linear; 00658 00659 expected_cost = 00660 criticality_fac * Tdel + (1. - criticality_fac) * cong_cost; 00661 return (expected_cost); 00662 } 00663 00664 else if(rr_type == IPIN) 00665 { /* Change if you're allowing route-throughs */ 00666 return (rr_indexed_data[SINK_COST_INDEX].base_cost); 00667 } 00668 00669 else 00670 { /* Change this if you want to investigate route-throughs */ 00671 return (0.); 00672 } 00673 } 00674 00675 00676 /** Macro used below to ensure that fractions are rounded up, but floating 00677 * point values very close to an integer are rounded to that integer. 00678 */ 00679 #define ROUND_UP(x) (ceil (x - 0.001)) 00680 00681 /** Returns the number of segments the same type as inode that will be needed 00682 * to reach target_node (not including inode) in each direction (the same 00683 * direction (horizontal or vertical) as inode and the orthogonal direction). 00684 */ 00685 static int 00686 get_expected_segs_to_target(int inode, 00687 int target_node, 00688 int *num_segs_ortho_dir_ptr) 00689 { 00690 t_rr_type rr_type; 00691 int target_x, target_y, num_segs_same_dir, cost_index, ortho_cost_index; 00692 int no_need_to_pass_by_clb; 00693 float inv_length, ortho_inv_length, ylow, yhigh, xlow, xhigh; 00694 00695 target_x = rr_node[target_node].xlow; 00696 target_y = rr_node[target_node].ylow; 00697 cost_index = rr_node[inode].cost_index; 00698 inv_length = rr_indexed_data[cost_index].inv_length; 00699 ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index; 00700 ortho_inv_length = rr_indexed_data[ortho_cost_index].inv_length; 00701 rr_type = rr_node[inode].type; 00702 00703 if(rr_type == CHANX) 00704 { 00705 ylow = rr_node[inode].ylow; 00706 xhigh = rr_node[inode].xhigh; 00707 xlow = rr_node[inode].xlow; 00708 00709 /* Count vertical (orthogonal to inode) segs first. */ 00710 00711 if(ylow > target_y) 00712 { /* Coming from a row above target? */ 00713 *num_segs_ortho_dir_ptr = 00714 ROUND_UP((ylow - target_y + 1.) * ortho_inv_length); 00715 no_need_to_pass_by_clb = 1; 00716 } 00717 else if(ylow < target_y - 1) 00718 { /* Below the CLB bottom? */ 00719 *num_segs_ortho_dir_ptr = ROUND_UP((target_y - ylow) * 00720 ortho_inv_length); 00721 no_need_to_pass_by_clb = 1; 00722 } 00723 else 00724 { /* In a row that passes by target CLB */ 00725 *num_segs_ortho_dir_ptr = 0; 00726 no_need_to_pass_by_clb = 0; 00727 } 00728 00729 /* Now count horizontal (same dir. as inode) segs. */ 00730 00731 if(xlow > target_x + no_need_to_pass_by_clb) 00732 { 00733 num_segs_same_dir = 00734 ROUND_UP((xlow - no_need_to_pass_by_clb - 00735 target_x) * inv_length); 00736 } 00737 else if(xhigh < target_x - no_need_to_pass_by_clb) 00738 { 00739 num_segs_same_dir = 00740 ROUND_UP((target_x - no_need_to_pass_by_clb - 00741 xhigh) * inv_length); 00742 } 00743 else 00744 { 00745 num_segs_same_dir = 0; 00746 } 00747 } 00748 00749 else 00750 { /* inode is a CHANY */ 00751 ylow = rr_node[inode].ylow; 00752 yhigh = rr_node[inode].yhigh; 00753 xlow = rr_node[inode].xlow; 00754 00755 /* Count horizontal (orthogonal to inode) segs first. */ 00756 00757 if(xlow > target_x) 00758 { /* Coming from a column right of target? */ 00759 *num_segs_ortho_dir_ptr = 00760 ROUND_UP((xlow - target_x + 1.) * ortho_inv_length); 00761 no_need_to_pass_by_clb = 1; 00762 } 00763 else if(xlow < target_x - 1) 00764 { /* Left of and not adjacent to the CLB? */ 00765 *num_segs_ortho_dir_ptr = ROUND_UP((target_x - xlow) * 00766 ortho_inv_length); 00767 no_need_to_pass_by_clb = 1; 00768 } 00769 else 00770 { /* In a column that passes by target CLB */ 00771 *num_segs_ortho_dir_ptr = 0; 00772 no_need_to_pass_by_clb = 0; 00773 } 00774 00775 /* Now count vertical (same dir. as inode) segs. */ 00776 00777 if(ylow > target_y + no_need_to_pass_by_clb) 00778 { 00779 num_segs_same_dir = 00780 ROUND_UP((ylow - no_need_to_pass_by_clb - 00781 target_y) * inv_length); 00782 } 00783 else if(yhigh < target_y - no_need_to_pass_by_clb) 00784 { 00785 num_segs_same_dir = 00786 ROUND_UP((target_y - no_need_to_pass_by_clb - 00787 yhigh) * inv_length); 00788 } 00789 else 00790 { 00791 num_segs_same_dir = 0; 00792 } 00793 } 00794 00795 return (num_segs_same_dir); 00796 } 00797 00798 /** Changes the base costs of different types of rr_nodes according to the 00799 * criticality, fanout, etc. of the current net being routed (inet). 00800 */ 00801 static void 00802 update_rr_base_costs(int inet, 00803 float largest_criticality) 00804 { 00805 00806 float fanout, factor; 00807 int index; 00808 00809 fanout = clb_net[inet].num_sinks; 00810 00811 /* Other reasonable values for factor include fanout and 1 */ 00812 factor = sqrt(fanout); 00813 00814 for(index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++) 00815 { 00816 if(rr_indexed_data[index].T_quadratic > 0.) 00817 { /* pass transistor */ 00818 rr_indexed_data[index].base_cost = 00819 rr_indexed_data[index].saved_base_cost * factor; 00820 } 00821 else 00822 { 00823 rr_indexed_data[index].base_cost = 00824 rr_indexed_data[index].saved_base_cost; 00825 } 00826 } 00827 } 00828 00829 /** Nets that have high fanout can take a very long time to route. Each sink should be routed contained within a bin instead of the entire bounding box to speed things up */ 00830 static int mark_node_expansion_by_bin(int inet, int target_node, t_rt_node * rt_node) { 00831 int target_x, target_y; 00832 int rlim = 1; 00833 int inode; 00834 float area; 00835 boolean success; 00836 t_linked_rt_edge *linked_rt_edge; 00837 t_rt_node * child_node; 00838 00839 target_x = rr_node[target_node].xlow; 00840 target_y = rr_node[target_node].ylow; 00841 00842 if(clb_net[inet].num_sinks < HIGH_FANOUT_NET_LIM) { 00843 /* This algorithm only applies to high fanout nets */ 00844 return 1; 00845 } 00846 00847 area = (route_bb[inet].xmax - route_bb[inet].xmin) * (route_bb[inet].ymax - route_bb[inet].ymin); 00848 if(area <= 0) { 00849 area = 1; 00850 } 00851 00852 rlim = ceil(sqrt((float)area / (float)clb_net[inet].num_sinks)); 00853 if(rt_node == NULL || rt_node->u.child_list == NULL) { 00854 /* If unknown traceback, set radius of bin to be size of chip */ 00855 rlim = max(nx + 2, ny + 2); 00856 return rlim; 00857 } 00858 00859 success = FALSE; 00860 /* determine quickly a feasible bin radius to route sink for high fanout nets 00861 this is necessary to prevent super long runtimes for high fanout nets; in best case, a reduction in complexity from O(N^2logN) to O(NlogN) (Swartz fast router) 00862 */ 00863 linked_rt_edge = rt_node->u.child_list; 00864 while(success == FALSE && linked_rt_edge != NULL) { 00865 while(linked_rt_edge != NULL && success == FALSE) 00866 { 00867 child_node = linked_rt_edge->child; 00868 inode = child_node->inode; 00869 if(!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) { 00870 if(rr_node[inode].xlow <= target_x + rlim && 00871 rr_node[inode].xhigh >= target_x - rlim && 00872 rr_node[inode].ylow <= target_y + rlim && 00873 rr_node[inode].yhigh >= target_y - rlim) { 00874 success = TRUE; 00875 } 00876 } 00877 linked_rt_edge = linked_rt_edge->next; 00878 } 00879 00880 if(success == FALSE) { 00881 if(rlim > max(nx + 2, ny + 2)) { 00882 printf(ERRTAG "VPR internal error, net %s has paths that are not found in traceback\n", clb_net[inet].name); 00883 exit(1); 00884 } 00885 /* if sink not in bin, increase bin size until fit */ 00886 rlim *= 2; 00887 } else { 00888 /* Sometimes might just catch a wire in the end segment, need to give it some channel space to explore */ 00889 rlim += 4; 00890 } 00891 linked_rt_edge = rt_node->u.child_list; 00892 } 00893 00894 /* redetermine expansion based on rlim */ 00895 linked_rt_edge = rt_node->u.child_list; 00896 while(linked_rt_edge != NULL) 00897 { 00898 child_node = linked_rt_edge->child; 00899 inode = child_node->inode; 00900 if(!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) { 00901 if(rr_node[inode].xlow <= target_x + rlim && 00902 rr_node[inode].xhigh >= target_x - rlim && 00903 rr_node[inode].ylow <= target_y + rlim && 00904 rr_node[inode].yhigh >= target_y - rlim) { 00905 child_node->re_expand = TRUE; 00906 } else { 00907 child_node->re_expand = FALSE; 00908 } 00909 } 00910 linked_rt_edge = linked_rt_edge->next; 00911 } 00912 return rlim; 00913 } 00914 00915 00916 #define ERROR_TOL 0.0001 00917 00918 /** Checks that the net delays computed incrementally during timing driven 00919 * routing match those computed from scratch by the net_delay.c module. 00920 */ 00921 static void 00922 timing_driven_check_net_delays(float **net_delay) 00923 { 00924 int inet, ipin; 00925 float **net_delay_check; 00926 struct s_linked_vptr *ch_list_head_net_delay_check; 00927 00928 net_delay_check = alloc_net_delay(&ch_list_head_net_delay_check, clb_net, num_nets); 00929 load_net_delay_from_routing(net_delay_check, clb_net, num_nets); 00930 00931 for(inet = 0; inet < num_nets; inet++) 00932 { 00933 for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) 00934 { 00935 if(net_delay_check[inet][ipin] == 0.) 00936 { /* Should be only GLOBAL nets */ 00937 if(net_delay[inet][ipin] != 0.) 00938 { 00939 printf 00940 ("Error in timing_driven_check_net_delays: net %d pin %d." 00941 "\tIncremental calc. net_delay is %g, but from scratch " 00942 "net delay is %g.\n", inet, ipin, 00943 net_delay[inet][ipin], 00944 net_delay_check[inet][ipin]); 00945 exit(1); 00946 } 00947 } 00948 else 00949 { 00950 if(fabs 00951 (1. - 00952 net_delay[inet][ipin] / 00953 net_delay_check[inet][ipin]) > ERROR_TOL) 00954 { 00955 printf 00956 ("Error in timing_driven_check_net_delays: net %d pin %d." 00957 "\tIncremental calc. net_delay is %g, but from scratch " 00958 "net delay is %g.\n", inet, ipin, 00959 net_delay[inet][ipin], 00960 net_delay_check[inet][ipin]); 00961 exit(1); 00962 } 00963 } 00964 } 00965 } 00966 00967 free_net_delay(net_delay_check, &ch_list_head_net_delay_check); 00968 printf("Completed net delay value cross check successfully.\n"); 00969 }