VPR-6.0
|
00001 /*#include <stdlib.h> */ 00002 #include <stdio.h> 00003 #include <math.h> 00004 #include <assert.h> 00005 #include "util.h" 00006 #include "vpr_types.h" 00007 #include "globals.h" 00008 #include "mst.h" 00009 #include "place.h" 00010 #include "read_place.h" 00011 #include "draw.h" 00012 #include "place_and_route.h" 00013 #include "net_delay.h" 00014 #include "path_delay.h" 00015 #include "timing_place_lookup.h" 00016 #include "timing_place.h" 00017 #include "place_stats.h" 00018 #include "read_xml_arch_file.h" 00019 00020 /************** Types and defines local to place.c ***************************/ 00021 00022 #define SMALL_NET 4 /**< Cut off for incremental bounding box updates. 00023 * 4 is fastest -- I checked. */ 00024 00025 00026 /** For comp_cost. NORMAL means use the method that generates updateable 00027 * bounding boxes for speed. CHECK means compute all bounding boxes from 00028 * scratch using a very simple routine to allow checks of the other 00029 * costs. 00030 */ 00031 enum cost_methods 00032 { NORMAL, CHECK }; 00033 00034 #define FROM 0 /**< What block connected to a net has moved? */ 00035 #define TO 1 00036 #define FROM_AND_TO 2 00037 00038 #define ERROR_TOL .01 00039 #define MAX_MOVES_BEFORE_RECOMPUTE 50000 00040 00041 /********************** Variables local to place.c ***************************/ 00042 00043 /** Cost of a net, and a temporary cost of a net used during move assessment. */ 00044 static float *net_cost = NULL, *temp_net_cost = NULL; /**< [0..num_nets-1] */ 00045 00046 /**@{*/ 00047 /** [0..num_nets-1][1..num_pins-1]. What is the value of the timing 00048 * driven portion of the cost function. These arrays will be set to 00049 * (criticality * delay) for each point to point connection. 00050 */ 00051 static float **point_to_point_timing_cost = NULL; 00052 static float **temp_point_to_point_timing_cost = NULL; 00053 /**@}*/ 00054 00055 /**@{*/ 00056 /** [0..num_nets-1][1..num_pins-1]. What is the value of the delay 00057 * for each connection in the circuit */ 00058 static float **point_to_point_delay_cost = NULL; 00059 static float **temp_point_to_point_delay_cost = NULL; 00060 /**@}*/ 00061 00062 00063 /** [0..num_blocks-1][0..pins_per_clb-1]. Indicates which pin on the net 00064 * this block corresponds to, this is only required during timing-driven 00065 * placement. It is used to allow us to update individual connections on 00066 * each net */ 00067 static int **net_pin_index = NULL; 00068 00069 00070 /** [0..num_nets-1]. Store the bounding box coordinates and the number of 00071 * blocks on each of a net's bounding box (to allow efficient updates), 00072 * respectively. 00073 */ 00074 static struct s_bb *bb_coords = NULL, *bb_num_on_edges = NULL; 00075 00076 /** Stores the maximum and expected occupancies, plus the cost, of each 00077 * region in the placement. Used only by the NONLINEAR_CONG cost 00078 * function. [0..num_region-1][0..num_region-1]. Place_region_x and 00079 * y give the situation for the x and y directed channels, respectively. 00080 */ 00081 static struct s_place_region **place_region_x, **place_region_y; 00082 00083 /** Used only with nonlinear congestion. [0..num_regions]. */ 00084 static float *place_region_bounds_x, *place_region_bounds_y; 00085 00086 /** The arrays below are used to precompute the inverse of the average 00087 * number of tracks per channel between [subhigh] and [sublow]. Access 00088 * them as chan?_place_cost_fac[subhigh][sublow]. They are used to 00089 * speed up the computation of the cost function that takes the length 00090 * of the net bounding box in each dimension, divided by the average 00091 * number of tracks in that direction; for other cost functions they 00092 * will never be used. 00093 */ 00094 static float **chanx_place_cost_fac, **chany_place_cost_fac; 00095 00096 00097 /** Expected crossing counts for nets with different #'s of pins. From 00098 * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me). 00099 */ 00100 static const float cross_count[50] = { /**< [0..49] */ 00101 1.0, 1.0, 1.0, 1.0828, 1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493, 00102 1.4974, 1.5455, 1.5937, 1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519, 00103 1.8924, 00104 1.9288, 1.9652, 2.0015, 2.0379, 2.0743, 2.1061, 2.1379, 2.1698, 2.2016, 00105 2.2334, 00106 2.2646, 2.2958, 2.3271, 2.3583, 2.3895, 2.4187, 2.4479, 2.4772, 2.5064, 00107 2.5356, 00108 2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410, 2.7671, 00109 2.7933 00110 }; 00111 00112 00113 /********************* Static subroutines local to place.c *******************/ 00114 00115 static void alloc_place_regions(int num_regions); 00116 00117 static void load_place_regions(int num_regions); 00118 00119 static void free_place_regions(int num_regions); 00120 00121 static void alloc_and_load_placement_structs(int place_cost_type, 00122 int num_regions, 00123 float place_cost_exp, 00124 float ***old_region_occ_x, 00125 float ***old_region_occ_y, 00126 struct s_placer_opts 00127 placer_opts); 00128 00129 static void free_placement_structs(int place_cost_type, 00130 int num_regions, 00131 float **old_region_occ_x, 00132 float **old_region_occ_y, 00133 struct s_placer_opts placer_opts); 00134 00135 static void alloc_and_load_for_fast_cost_update(float place_cost_exp); 00136 00137 static void initial_placement(enum e_pad_loc_type pad_loc_type, 00138 char *pad_loc_file); 00139 00140 static float comp_bb_cost(int method, 00141 int place_cost_type, 00142 int num_regions); 00143 00144 static int try_swap(float t, 00145 float *cost, 00146 float *bb_cost, 00147 float *timing_cost, 00148 float rlim, 00149 int place_cost_type, 00150 float **old_region_occ_x, 00151 float **old_region_occ_y, 00152 int num_regions, 00153 boolean fixed_pins, 00154 enum e_place_algorithm place_algorithm, 00155 float timing_tradeoff, 00156 float inverse_prev_bb_cost, 00157 float inverse_prev_timing_cost, 00158 float *delay_cost, 00159 int *x_lookup); 00160 00161 static void check_place(float bb_cost, 00162 float timing_cost, 00163 int place_cost_type, 00164 int num_regions, 00165 enum e_place_algorithm place_algorithm, 00166 float delay_cost); 00167 00168 static float starting_t(float *cost_ptr, 00169 float *bb_cost_ptr, 00170 float *timing_cost_ptr, 00171 int place_cost_type, 00172 float **old_region_occ_x, 00173 float **old_region_occ_y, 00174 int num_regions, 00175 boolean fixed_pins, 00176 struct s_annealing_sched annealing_sched, 00177 int max_moves, 00178 float rlim, 00179 enum e_place_algorithm place_algorithm, 00180 float timing_tradeoff, 00181 float inverse_prev_bb_cost, 00182 float inverse_prev_timing_cost, 00183 float *delay_cost_ptr); 00184 00185 00186 static void update_t(float *t, 00187 float std_dev, 00188 float rlim, 00189 float success_rat, 00190 struct s_annealing_sched annealing_sched); 00191 00192 static void update_rlim(float *rlim, 00193 float success_rat); 00194 00195 static int exit_crit(float t, 00196 float cost, 00197 struct s_annealing_sched annealing_sched); 00198 00199 static int count_connections(void); 00200 00201 static void compute_net_pin_index_values(void); 00202 00203 static double get_std_dev(int n, 00204 double sum_x_squared, 00205 double av_x); 00206 00207 static void free_fast_cost_update_structs(void); 00208 00209 static float recompute_bb_cost(int place_cost_type, 00210 int num_regions); 00211 00212 static float comp_td_point_to_point_delay(int inet, 00213 int ipin); 00214 00215 static void update_td_cost(int b_from, 00216 int b_to, 00217 int num_of_pins); 00218 00219 static void comp_delta_td_cost(int b_from, 00220 int b_to, 00221 int num_of_pins, 00222 float *delta_timing, 00223 float *delta_delay); 00224 00225 static void comp_td_costs(float *timing_cost, 00226 float *connection_delay_sum); 00227 00228 static int assess_swap(float delta_c, 00229 float t); 00230 00231 static boolean find_to(int x_from, 00232 int y_from, 00233 t_type_ptr type, 00234 float rlim, 00235 int *x_lookup, 00236 int *x_to, 00237 int *y_to); 00238 00239 static void get_non_updateable_bb(int inet, 00240 struct s_bb *bb_coord_new); 00241 00242 static void update_bb(int inet, 00243 struct s_bb *bb_coord_new, 00244 struct s_bb *bb_edge_new, 00245 int xold, 00246 int yold, 00247 int xnew, 00248 int ynew); 00249 00250 static int find_affected_nets(int *nets_to_update, 00251 int *net_block_moved, 00252 int b_from, 00253 int b_to, 00254 int num_of_pins); 00255 00256 static float get_net_cost(int inet, 00257 struct s_bb *bb_ptr); 00258 00259 static float nonlinear_cong_cost(int num_regions); 00260 00261 static void update_region_occ(int inet, 00262 struct s_bb *coords, 00263 int add_or_sub, 00264 int num_regions); 00265 00266 static void save_region_occ(float **old_region_occ_x, 00267 float **old_region_occ_y, 00268 int num_regions); 00269 00270 static void restore_region_occ(float **old_region_occ_x, 00271 float **old_region_occ_y, 00272 int num_regions); 00273 00274 static void get_bb_from_scratch(int inet, 00275 struct s_bb *coords, 00276 struct s_bb *num_on_edges); 00277 00278 static double get_net_wirelength_estimate(int inet, 00279 struct s_bb *bbptr); 00280 00281 /*****************************************************************************/ 00282 /* RESEARCH TODO: Bounding Box and rlim need to be redone for heterogeneous to prevent a QoR penalty */ 00283 /** Does almost all the work of placing a circuit. Width_fac gives the 00284 * width of the widest channel. Place_cost_exp says what exponent the 00285 * width should be taken to when calculating costs. This allows a 00286 * greater bias for anisotropic architectures. Place_cost_type 00287 * determines which cost function is used. num_regions is used only 00288 * the place_cost_type is NONLINEAR_CONG. 00289 */ 00290 void 00291 try_place(struct s_placer_opts placer_opts, 00292 struct s_annealing_sched annealing_sched, 00293 t_chan_width_dist chan_width_dist, 00294 struct s_router_opts router_opts, 00295 struct s_det_routing_arch det_routing_arch, 00296 t_segment_inf * segment_inf, 00297 t_timing_inf timing_inf, 00298 t_mst_edge *** mst) 00299 { 00300 int tot_iter, inner_iter, success_sum; 00301 int move_lim, moves_since_cost_recompute, width_fac; 00302 float t, success_rat, rlim, d_max, est_crit; 00303 float cost, timing_cost, bb_cost, new_bb_cost, new_timing_cost; 00304 float delay_cost, new_delay_cost, place_delay_value; 00305 float inverse_prev_bb_cost, inverse_prev_timing_cost; 00306 float oldt; 00307 double av_cost, av_bb_cost, av_timing_cost, av_delay_cost, 00308 sum_of_squares, std_dev; 00309 float **old_region_occ_x, **old_region_occ_y; 00310 char msg[BUFSIZE]; 00311 boolean fixed_pins; /* Can pads move or not? */ 00312 int num_connections; 00313 int inet, ipin, outer_crit_iter_count, inner_crit_iter_count, 00314 inner_recompute_limit; 00315 float **net_slack, **net_delay; 00316 float crit_exponent; 00317 float first_rlim, final_rlim, inverse_delta_rlim; 00318 float **remember_net_delay_original_ptr; /*used to free net_delay if it is re-assigned */ 00319 00320 int *x_lookup; /* Used to quickly determine valid swap columns */ 00321 00322 /* Allocated here because it goes into timing critical code where each memory allocation is expensive */ 00323 x_lookup = my_malloc(nx * sizeof(int)); 00324 net_delay = net_slack = NULL; 00325 00326 remember_net_delay_original_ptr = NULL; /*prevents compiler warning */ 00327 00328 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 00329 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || 00330 placer_opts.enable_timing_computations) 00331 { 00332 /*do this before the initial placement to avoid messing up the initial placement */ 00333 alloc_lookups_and_criticalities(chan_width_dist, 00334 router_opts, 00335 det_routing_arch, 00336 segment_inf, 00337 timing_inf, 00338 &net_delay, &net_slack); 00339 00340 remember_net_delay_original_ptr = net_delay; 00341 00342 /*#define PRINT_LOWER_BOUND */ 00343 #ifdef PRINT_LOWER_BOUND 00344 /*print the crit_path, assuming delay between blocks that are* 00345 *block_dist apart*/ 00346 00347 if(placer_opts.block_dist <= nx) 00348 place_delay_value = 00349 delta_clb_to_clb[placer_opts.block_dist][0]; 00350 else if(placer_opts.block_dist <= ny) 00351 place_delay_value = 00352 delta_clb_to_clb[0][placer_opts.block_dist]; 00353 else 00354 place_delay_value = delta_clb_to_clb[nx][ny]; 00355 00356 printf("\nLower bound assuming delay of %g\n", place_delay_value); 00357 00358 load_constant_net_delay(net_delay, place_delay_value); 00359 load_timing_graph_net_delays(net_delay); 00360 d_max = load_net_slack(net_slack, 0); 00361 00362 #ifdef CREATE_ECHO_FILES 00363 print_critical_path("Placement_Lower_Bound.echo"); 00364 print_sink_delays("Placement_Lower_Bound_Sink_Delays.echo"); 00365 #endif /* CREATE_ECHO_FILES */ 00366 00367 /*also print sink delays assuming 0 delay between blocks, 00368 * this tells us how much logic delay is on each path */ 00369 00370 load_constant_net_delay(net_delay, 0); 00371 load_timing_graph_net_delays(net_delay); 00372 d_max = load_net_slack(net_slack, 0); 00373 00374 #ifdef CREATE_ECHO_FILES 00375 print_sink_delays("Placement_Logic_Sink_Delays.echo"); 00376 #endif /* CREATE_ECHO_FILES */ 00377 #endif 00378 00379 } 00380 00381 width_fac = placer_opts.place_chan_width; 00382 if(placer_opts.pad_loc_type == FREE) 00383 fixed_pins = FALSE; 00384 else 00385 fixed_pins = TRUE; 00386 00387 init_chan(width_fac, chan_width_dist); 00388 00389 alloc_and_load_placement_structs(placer_opts.place_cost_type, 00390 placer_opts.num_regions, 00391 placer_opts.place_cost_exp, 00392 &old_region_occ_x, &old_region_occ_y, 00393 placer_opts); 00394 00395 initial_placement(placer_opts.pad_loc_type, placer_opts.pad_loc_file); 00396 init_draw_coords((float)width_fac); 00397 00398 /* Storing the number of pins on each type of block makes the swap routine * 00399 * slightly more efficient. */ 00400 00401 /* Gets initial cost and loads bounding boxes. */ 00402 00403 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 00404 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) 00405 { 00406 bb_cost = comp_bb_cost(NORMAL, placer_opts.place_cost_type, 00407 placer_opts.num_regions); 00408 00409 crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */ 00410 00411 compute_net_pin_index_values(); 00412 00413 num_connections = count_connections(); 00414 printf 00415 ("\nThere are %d point to point connections in this circuit\n\n", 00416 num_connections); 00417 00418 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) 00419 { 00420 for(inet = 0; inet < num_nets; inet++) 00421 for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) 00422 timing_place_crit[inet][ipin] = 0; /*dummy crit values */ 00423 00424 comp_td_costs(&timing_cost, &delay_cost); /*first pass gets delay_cost, which is used 00425 * in criticality computations in the next call 00426 * to comp_td_costs. */ 00427 place_delay_value = delay_cost / num_connections; /*used for computing criticalities */ 00428 load_constant_net_delay(net_delay, place_delay_value, clb_net, num_nets); 00429 00430 } 00431 else 00432 place_delay_value = 0; 00433 00434 00435 if(placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) 00436 { 00437 net_delay = point_to_point_delay_cost; /*this keeps net_delay up to date with * 00438 * *the same values that the placer is using * 00439 * *point_to_point_delay_cost is computed each* 00440 * *time that comp_td_costs is called, and is * 00441 * *also updated after any swap is accepted */ 00442 } 00443 00444 00445 load_timing_graph_net_delays(net_delay); 00446 d_max = load_net_slack(net_slack, 0); 00447 load_criticalities(placer_opts, net_slack, d_max, crit_exponent); 00448 outer_crit_iter_count = 1; 00449 00450 /*now we can properly compute costs */ 00451 comp_td_costs(&timing_cost, &delay_cost); /*also puts proper values into point_to_point_delay_cost */ 00452 00453 inverse_prev_timing_cost = 1 / timing_cost; 00454 inverse_prev_bb_cost = 1 / bb_cost; 00455 cost = 1; /*our new cost function uses normalized values of */ 00456 /*bb_cost and timing_cost, the value of cost will be reset */ 00457 /*to 1 at each temperature when *_TIMING_DRIVEN_PLACE is true */ 00458 } 00459 else 00460 { /*BOUNDING_BOX_PLACE */ 00461 cost = bb_cost = comp_bb_cost(NORMAL, placer_opts.place_cost_type, 00462 placer_opts.num_regions); 00463 timing_cost = 0; 00464 delay_cost = 0; 00465 place_delay_value = 0; 00466 outer_crit_iter_count = 0; 00467 num_connections = 0; 00468 d_max = 0; 00469 crit_exponent = 0; 00470 00471 inverse_prev_timing_cost = 0; /*inverses not used */ 00472 inverse_prev_bb_cost = 0; 00473 } 00474 00475 move_lim = (int)(annealing_sched.inner_num * pow(num_blocks, 1.3333)); 00476 00477 if(placer_opts.inner_loop_recompute_divider != 0) 00478 inner_recompute_limit = (int)(0.5 + (float)move_lim / 00479 (float)placer_opts. 00480 inner_loop_recompute_divider); 00481 else /*don't do an inner recompute */ 00482 inner_recompute_limit = move_lim + 1; 00483 00484 00485 /* Sometimes I want to run the router with a random placement. Avoid * 00486 * using 0 moves to stop division by 0 and 0 length vector problems, * 00487 * by setting move_lim to 1 (which is still too small to do any * 00488 * significant optimization). */ 00489 00490 if(move_lim <= 0) 00491 move_lim = 1; 00492 00493 rlim = (float)max(nx, ny); 00494 00495 first_rlim = rlim; /*used in timing-driven placement for exponent computation */ 00496 final_rlim = 1; 00497 inverse_delta_rlim = 1 / (first_rlim - final_rlim); 00498 00499 t = starting_t(&cost, &bb_cost, &timing_cost, 00500 placer_opts.place_cost_type, 00501 old_region_occ_x, old_region_occ_y, 00502 placer_opts.num_regions, fixed_pins, annealing_sched, 00503 move_lim, rlim, placer_opts.place_algorithm, 00504 placer_opts.timing_tradeoff, inverse_prev_bb_cost, 00505 inverse_prev_timing_cost, &delay_cost); 00506 tot_iter = 0; 00507 moves_since_cost_recompute = 0; 00508 printf 00509 ("Initial Placement Cost: %g bb_cost: %g td_cost: %g delay_cost: %g\n\n", 00510 cost, bb_cost, timing_cost, delay_cost); 00511 00512 #ifndef SPEC 00513 printf 00514 ("%11s %10s %11s %11s %11s %11s %11s %9s %8s %7s %7s %10s %7s\n", 00515 "T", "Cost", "Av. BB Cost", "Av. TD Cost", "Av Tot Del", 00516 "P to P Del", "d_max", "Ac Rate", "Std Dev", "R limit", "Exp", 00517 "Tot. Moves", "Alpha"); 00518 printf 00519 ("%11s %10s %11s %11s %11s %11s %11s %9s %8s %7s %7s %10s %7s\n", 00520 "--------", "----------", "-----------", "-----------", 00521 "---------", "----------", "-----", "-------", "-------", 00522 "-------", "-------", "----------", "-----"); 00523 #endif 00524 00525 sprintf(msg, 00526 "Initial Placement. Cost: %g BB Cost: %g TD Cost %g Delay Cost: %g " 00527 "\t d_max %g Channel Factor: %d", cost, bb_cost, timing_cost, 00528 delay_cost, d_max, width_fac); 00529 update_screen(MAJOR, msg, PLACEMENT, FALSE); 00530 00531 while(exit_crit(t, cost, annealing_sched) == 0) 00532 { 00533 00534 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 00535 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) 00536 { 00537 cost = 1; 00538 } 00539 00540 av_cost = 0.; 00541 av_bb_cost = 0.; 00542 av_delay_cost = 0.; 00543 av_timing_cost = 0.; 00544 sum_of_squares = 0.; 00545 success_sum = 0; 00546 00547 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 00548 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) 00549 { 00550 00551 if(outer_crit_iter_count >= 00552 placer_opts.recompute_crit_iter 00553 || placer_opts.inner_loop_recompute_divider != 0) 00554 { 00555 #ifdef VERBOSE 00556 printf("Outer Loop Recompute Criticalities\n"); 00557 #endif 00558 place_delay_value = delay_cost / num_connections; 00559 00560 if(placer_opts.place_algorithm == 00561 NET_TIMING_DRIVEN_PLACE) 00562 load_constant_net_delay(net_delay, 00563 place_delay_value, clb_net, num_nets); 00564 /*note, for path_based, the net delay is not updated since it is current, 00565 *because it accesses point_to_point_delay array */ 00566 00567 load_timing_graph_net_delays(net_delay); 00568 d_max = load_net_slack(net_slack, 0); 00569 load_criticalities(placer_opts, net_slack, d_max, 00570 crit_exponent); 00571 /*recompute costs from scratch, based on new criticalities */ 00572 comp_td_costs(&timing_cost, &delay_cost); 00573 outer_crit_iter_count = 0; 00574 } 00575 outer_crit_iter_count++; 00576 00577 /*at each temperature change we update these values to be used */ 00578 /*for normalizing the tradeoff between timing and wirelength (bb) */ 00579 inverse_prev_bb_cost = 1 / bb_cost; 00580 inverse_prev_timing_cost = 1 / timing_cost; 00581 } 00582 00583 inner_crit_iter_count = 1; 00584 00585 for(inner_iter = 0; inner_iter < move_lim; inner_iter++) 00586 { 00587 if(try_swap(t, &cost, &bb_cost, &timing_cost, 00588 rlim, placer_opts.place_cost_type, 00589 old_region_occ_x, old_region_occ_y, 00590 placer_opts.num_regions, fixed_pins, 00591 placer_opts.place_algorithm, 00592 placer_opts.timing_tradeoff, 00593 inverse_prev_bb_cost, 00594 inverse_prev_timing_cost, &delay_cost, 00595 x_lookup) == 1) 00596 { 00597 success_sum++; 00598 av_cost += cost; 00599 av_bb_cost += bb_cost; 00600 av_timing_cost += timing_cost; 00601 av_delay_cost += delay_cost; 00602 sum_of_squares += cost * cost; 00603 } 00604 00605 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE 00606 || placer_opts.place_algorithm == 00607 PATH_TIMING_DRIVEN_PLACE) 00608 { 00609 00610 if(inner_crit_iter_count >= inner_recompute_limit 00611 && inner_iter != move_lim - 1) 00612 { /*on last iteration don't recompute */ 00613 00614 inner_crit_iter_count = 0; 00615 #ifdef VERBOSE 00616 printf 00617 ("Inner Loop Recompute Criticalities\n"); 00618 #endif 00619 if(placer_opts.place_algorithm == 00620 NET_TIMING_DRIVEN_PLACE) 00621 { 00622 place_delay_value = 00623 delay_cost / num_connections; 00624 load_constant_net_delay(net_delay, 00625 place_delay_value, clb_net, num_nets); 00626 } 00627 00628 load_timing_graph_net_delays(net_delay); 00629 d_max = load_net_slack(net_slack, 0); 00630 load_criticalities(placer_opts, net_slack, 00631 d_max, crit_exponent); 00632 comp_td_costs(&timing_cost, &delay_cost); 00633 } 00634 inner_crit_iter_count++; 00635 } 00636 #ifdef VERBOSE 00637 printf 00638 ("t = %g cost = %g bb_cost = %g timing_cost = %g move = %d dmax = %g\n", 00639 t, cost, bb_cost, timing_cost, inner_iter, d_max); 00640 if(fabs 00641 (bb_cost - 00642 comp_bb_cost(CHECK, placer_opts.place_cost_type, 00643 placer_opts.num_regions)) > 00644 bb_cost * ERROR_TOL) 00645 exit(1); 00646 #endif 00647 } 00648 00649 /* Lines below prevent too much round-off error from accumulating * 00650 * in the cost over many iterations. This round-off can lead to * 00651 * error checks failing because the cost is different from what * 00652 * you get when you recompute from scratch. */ 00653 00654 moves_since_cost_recompute += move_lim; 00655 if(moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE) 00656 { 00657 new_bb_cost = 00658 recompute_bb_cost(placer_opts.place_cost_type, 00659 placer_opts.num_regions); 00660 if(fabs(new_bb_cost - bb_cost) > bb_cost * ERROR_TOL) 00661 { 00662 printf 00663 ("Error in try_place: new_bb_cost = %g, old bb_cost = %g.\n", 00664 new_bb_cost, bb_cost); 00665 exit(1); 00666 } 00667 bb_cost = new_bb_cost; 00668 00669 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE 00670 || placer_opts.place_algorithm == 00671 PATH_TIMING_DRIVEN_PLACE) 00672 { 00673 comp_td_costs(&new_timing_cost, &new_delay_cost); 00674 if(fabs(new_timing_cost - timing_cost) > 00675 timing_cost * ERROR_TOL) 00676 { 00677 printf 00678 ("Error in try_place: new_timing_cost = %g, old timing_cost = %g.\n", 00679 new_timing_cost, timing_cost); 00680 exit(1); 00681 } 00682 if(fabs(new_delay_cost - delay_cost) > 00683 delay_cost * ERROR_TOL) 00684 { 00685 printf 00686 ("Error in try_place: new_delay_cost = %g, old delay_cost = %g.\n", 00687 new_delay_cost, delay_cost); 00688 exit(1); 00689 } 00690 timing_cost = new_timing_cost; 00691 } 00692 00693 if(placer_opts.place_algorithm == BOUNDING_BOX_PLACE) 00694 { 00695 cost = new_bb_cost; 00696 } 00697 moves_since_cost_recompute = 0; 00698 } 00699 00700 tot_iter += move_lim; 00701 success_rat = ((float)success_sum) / move_lim; 00702 if(success_sum == 0) 00703 { 00704 av_cost = cost; 00705 av_bb_cost = bb_cost; 00706 av_timing_cost = timing_cost; 00707 av_delay_cost = delay_cost; 00708 } 00709 else 00710 { 00711 av_cost /= success_sum; 00712 av_bb_cost /= success_sum; 00713 av_timing_cost /= success_sum; 00714 av_delay_cost /= success_sum; 00715 } 00716 std_dev = get_std_dev(success_sum, sum_of_squares, av_cost); 00717 00718 #ifndef SPEC 00719 printf 00720 ("%11.5g %10.6g %11.6g %11.6g %11.6g %11.6g %11.4g %9.4g %8.3g %7.4g %7.4g %10d ", 00721 t, av_cost, av_bb_cost, av_timing_cost, av_delay_cost, 00722 place_delay_value, d_max, success_rat, std_dev, rlim, 00723 crit_exponent, tot_iter); 00724 #endif 00725 00726 oldt = t; /* for finding and printing alpha. */ 00727 update_t(&t, std_dev, rlim, success_rat, annealing_sched); 00728 00729 #ifndef SPEC 00730 printf("%7.4g\n", t / oldt); 00731 fflush(stdout); 00732 #endif 00733 00734 sprintf(msg, 00735 "Cost: %g BB Cost %g TD Cost %g Temperature: %g d_max: %g", 00736 cost, bb_cost, timing_cost, t, d_max); 00737 update_screen(MINOR, msg, PLACEMENT, FALSE); 00738 update_rlim(&rlim, success_rat); 00739 00740 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 00741 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) 00742 { 00743 crit_exponent = 00744 (1 - 00745 (rlim - 00746 final_rlim) * inverse_delta_rlim) * 00747 (placer_opts.td_place_exp_last - 00748 placer_opts.td_place_exp_first) + 00749 placer_opts.td_place_exp_first; 00750 } 00751 #ifdef VERBOSE 00752 dump_clbs(); 00753 #endif 00754 } 00755 00756 t = 0; /* freeze out */ 00757 av_cost = 0.; 00758 av_bb_cost = 0.; 00759 av_timing_cost = 0.; 00760 sum_of_squares = 0.; 00761 av_delay_cost = 0.; 00762 success_sum = 0; 00763 00764 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 00765 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) 00766 { 00767 /*at each temperature change we update these values to be used */ 00768 /*for normalizing the tradeoff between timing and wirelength (bb) */ 00769 if(outer_crit_iter_count >= placer_opts.recompute_crit_iter || 00770 placer_opts.inner_loop_recompute_divider != 0) 00771 { 00772 00773 #ifdef VERBOSE 00774 printf("Outer Loop Recompute Criticalities\n"); 00775 #endif 00776 place_delay_value = delay_cost / num_connections; 00777 00778 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) 00779 load_constant_net_delay(net_delay, place_delay_value, clb_net, num_nets); 00780 00781 load_timing_graph_net_delays(net_delay); 00782 d_max = load_net_slack(net_slack, 0); 00783 load_criticalities(placer_opts, net_slack, d_max, 00784 crit_exponent); 00785 /*recompute criticaliies */ 00786 comp_td_costs(&timing_cost, &delay_cost); 00787 outer_crit_iter_count = 0; 00788 } 00789 outer_crit_iter_count++; 00790 00791 inverse_prev_bb_cost = 1 / (bb_cost); 00792 inverse_prev_timing_cost = 1 / (timing_cost); 00793 } 00794 00795 inner_crit_iter_count = 1; 00796 00797 for(inner_iter = 0; inner_iter < move_lim; inner_iter++) 00798 { 00799 if(try_swap(t, &cost, &bb_cost, &timing_cost, 00800 rlim, placer_opts.place_cost_type, 00801 old_region_occ_x, old_region_occ_y, 00802 placer_opts.num_regions, fixed_pins, 00803 placer_opts.place_algorithm, 00804 placer_opts.timing_tradeoff, inverse_prev_bb_cost, 00805 inverse_prev_timing_cost, &delay_cost, x_lookup) == 1) 00806 { 00807 success_sum++; 00808 av_cost += cost; 00809 av_bb_cost += bb_cost; 00810 av_delay_cost += delay_cost; 00811 av_timing_cost += timing_cost; 00812 sum_of_squares += cost * cost; 00813 00814 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE 00815 || placer_opts.place_algorithm == 00816 PATH_TIMING_DRIVEN_PLACE) 00817 { 00818 00819 if(inner_crit_iter_count >= inner_recompute_limit 00820 && inner_iter != move_lim - 1) 00821 { 00822 00823 inner_crit_iter_count = 0; 00824 #ifdef VERBOSE 00825 printf 00826 ("Inner Loop Recompute Criticalities\n"); 00827 #endif 00828 if(placer_opts.place_algorithm == 00829 NET_TIMING_DRIVEN_PLACE) 00830 { 00831 place_delay_value = 00832 delay_cost / num_connections; 00833 load_constant_net_delay(net_delay, 00834 place_delay_value, clb_net, num_nets); 00835 } 00836 00837 load_timing_graph_net_delays(net_delay); 00838 d_max = load_net_slack(net_slack, 0); 00839 load_criticalities(placer_opts, net_slack, 00840 d_max, crit_exponent); 00841 comp_td_costs(&timing_cost, &delay_cost); 00842 } 00843 inner_crit_iter_count++; 00844 } 00845 } 00846 #ifdef VERBOSE 00847 printf("t = %g cost = %g move = %d\n", t, cost, tot_iter); 00848 #endif 00849 } 00850 tot_iter += move_lim; 00851 success_rat = ((float)success_sum) / move_lim; 00852 if(success_sum == 0) 00853 { 00854 av_cost = cost; 00855 av_bb_cost = bb_cost; 00856 av_delay_cost = delay_cost; 00857 av_timing_cost = timing_cost; 00858 } 00859 else 00860 { 00861 av_cost /= success_sum; 00862 av_bb_cost /= success_sum; 00863 av_delay_cost /= success_sum; 00864 av_timing_cost /= success_sum; 00865 } 00866 00867 std_dev = get_std_dev(success_sum, sum_of_squares, av_cost); 00868 00869 00870 #ifndef SPEC 00871 printf 00872 ("%11.5g %10.6g %11.6g %11.6g %11.6g %11.6g %11.4g %9.4g %8.3g %7.4g %7.4g %10d \n\n", 00873 t, av_cost, av_bb_cost, av_timing_cost, av_delay_cost, 00874 place_delay_value, d_max, success_rat, std_dev, rlim, 00875 crit_exponent, tot_iter); 00876 00877 #endif 00878 00879 #ifdef VERBOSE 00880 dump_clbs(); 00881 #endif 00882 00883 check_place(bb_cost, timing_cost, placer_opts.place_cost_type, 00884 placer_opts.num_regions, placer_opts.place_algorithm, 00885 delay_cost); 00886 00887 if(placer_opts.enable_timing_computations && 00888 placer_opts.place_algorithm == BOUNDING_BOX_PLACE) 00889 { 00890 /*need this done since the timing data has not been kept up to date* 00891 *in bounding_box mode */ 00892 for(inet = 0; inet < num_nets; inet++) 00893 for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) 00894 timing_place_crit[inet][ipin] = 0; /*dummy crit values */ 00895 comp_td_costs(&timing_cost, &delay_cost); /*computes point_to_point_delay_cost */ 00896 } 00897 00898 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 00899 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || 00900 placer_opts.enable_timing_computations) 00901 { 00902 net_delay = point_to_point_delay_cost; /*this makes net_delay up to date with * 00903 *the same values that the placer is using*/ 00904 load_timing_graph_net_delays(net_delay); 00905 est_crit = load_net_slack(net_slack, 0); 00906 #ifdef CREATE_ECHO_FILES 00907 /* print_sink_delays("placement_sink_delays.echo"); */ 00908 print_net_slack("placement_net_slacks.echo", net_slack); 00909 print_critical_path("placement_crit_path.echo"); 00910 #endif /* CREATE_ECHO_FILES */ 00911 printf("Placement Estimated Crit Path Delay: %g\n\n", est_crit); 00912 } 00913 00914 00915 sprintf(msg, 00916 "Placement. Cost: %g bb_cost: %g td_cost: %g Channel Factor: %d d_max: %g", 00917 cost, bb_cost, timing_cost, width_fac, d_max); 00918 printf 00919 ("Placement. Cost: %g bb_cost: %g td_cost: %g delay_cost: %g.\n", 00920 cost, bb_cost, timing_cost, delay_cost); 00921 update_screen(MAJOR, msg, PLACEMENT, FALSE); 00922 00923 #ifdef SPEC 00924 printf("Total moves attempted: %d.0\n", tot_iter); 00925 #endif 00926 00927 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 00928 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || 00929 placer_opts.enable_timing_computations) 00930 { 00931 00932 net_delay = remember_net_delay_original_ptr; 00933 00934 free_placement_structs(placer_opts.place_cost_type, 00935 placer_opts.num_regions, old_region_occ_x, 00936 old_region_occ_y, placer_opts); 00937 free_lookups_and_criticalities(&net_delay, &net_slack); 00938 } 00939 00940 /* placement is done - find mst of all nets. 00941 * creating mst for each net; this gives me an ordering of sinks 00942 * by which I will direct search (A*) for. */ 00943 if(*mst) 00944 { 00945 for(inet = 0; inet < num_nets; inet++) 00946 { 00947 assert((*mst)[inet]); 00948 free((*mst)[inet]); 00949 } 00950 free(*mst); 00951 } 00952 *mst = (t_mst_edge **) my_malloc(sizeof(t_mst_edge *) * num_nets); 00953 for(inet = 0; inet < num_nets; inet++) 00954 { 00955 (*mst)[inet] = get_mst_of_net(inet); 00956 } 00957 free(x_lookup); 00958 } 00959 00960 /** only count non-global connections */ 00961 static int 00962 count_connections() 00963 { 00964 00965 int count, inet; 00966 00967 count = 0; 00968 00969 for(inet = 0; inet < num_nets; inet++) 00970 { 00971 00972 if(clb_net[inet].is_global) 00973 continue; 00974 00975 count += clb_net[inet].num_sinks; 00976 } 00977 return (count); 00978 } 00979 00980 /** computes net_pin_index array, this array allows us to quickly 00981 * find what pin on the net a block pin corresponds to */ 00982 static void 00983 compute_net_pin_index_values() 00984 { 00985 int inet, netpin, blk, iblk, ipin; 00986 t_type_ptr type; 00987 00988 /*initialize values to OPEN */ 00989 for(iblk = 0; iblk < num_blocks; iblk++) 00990 { 00991 type = block[iblk].type; 00992 for(ipin = 0; ipin < type->num_pins; ipin++) 00993 { 00994 net_pin_index[iblk][ipin] = OPEN; 00995 } 00996 } 00997 00998 for(inet = 0; inet < num_nets; inet++) 00999 { 01000 01001 if(clb_net[inet].is_global) 01002 continue; 01003 01004 for(netpin = 0; netpin <= clb_net[inet].num_sinks; netpin++) 01005 { 01006 blk = clb_net[inet].node_block[netpin]; 01007 net_pin_index[blk][clb_net[inet].node_block_pin[netpin]] = 01008 netpin; 01009 } 01010 } 01011 } 01012 01013 01014 /** Returns the standard deviation of data set x. There are n sample points, 01015 * sum_x_squared is the summation over n of x^2 and av_x is the average x. 01016 * All operations are done in double precision, since round off error can be 01017 * a problem in the initial temp. std_dev calculation for big circuits. 01018 */ 01019 static double 01020 get_std_dev(int n, 01021 double sum_x_squared, 01022 double av_x) 01023 { 01024 01025 double std_dev; 01026 01027 if(n <= 1) 01028 std_dev = 0.; 01029 else 01030 std_dev = (sum_x_squared - n * av_x * av_x) / (double)(n - 1); 01031 01032 if(std_dev > 0.) /* Very small variances sometimes round negative */ 01033 std_dev = sqrt(std_dev); 01034 else 01035 std_dev = 0.; 01036 01037 return (std_dev); 01038 } 01039 01040 01041 /** Update the range limited to keep acceptance prob. near 0.44. Use 01042 * a floating point rlim to allow gradual transitions at low temps. 01043 */ 01044 static void 01045 update_rlim(float *rlim, 01046 float success_rat) 01047 { 01048 float upper_lim; 01049 01050 *rlim = (*rlim) * (1. - 0.44 + success_rat); 01051 upper_lim = max(nx, ny); 01052 *rlim = min(*rlim, upper_lim); 01053 *rlim = max(*rlim, 1.); 01054 } 01055 01056 01057 /** Update the temperature according to the annealing schedule selected. */ 01058 static void 01059 update_t(float *t, 01060 float std_dev, 01061 float rlim, 01062 float success_rat, 01063 struct s_annealing_sched annealing_sched) 01064 { 01065 01066 /* float fac; */ 01067 01068 if(annealing_sched.type == USER_SCHED) 01069 { 01070 *t = annealing_sched.alpha_t * (*t); 01071 } 01072 01073 /* Old standard deviation based stuff is below. This bogs down horribly 01074 * for big circuits (alu4 and especially bigkey_mod). */ 01075 /* #define LAMBDA .7 */ 01076 /* ------------------------------------ */ 01077 #if 0 01078 else if(std_dev == 0.) 01079 { 01080 *t = 0.; 01081 } 01082 else 01083 { 01084 fac = exp(-LAMBDA * (*t) / std_dev); 01085 fac = max(0.5, fac); 01086 *t = (*t) * fac; 01087 } 01088 #endif 01089 /* ------------------------------------- */ 01090 01091 else 01092 { /* AUTO_SCHED */ 01093 if(success_rat > 0.96) 01094 { 01095 *t = (*t) * 0.5; 01096 } 01097 else if(success_rat > 0.8) 01098 { 01099 *t = (*t) * 0.9; 01100 } 01101 else if(success_rat > 0.15 || rlim > 1.) 01102 { 01103 *t = (*t) * 0.95; 01104 } 01105 else 01106 { 01107 *t = (*t) * 0.8; 01108 } 01109 } 01110 } 01111 01112 01113 /** Return 1 when the exit criterion is met. */ 01114 static int 01115 exit_crit(float t, 01116 float cost, 01117 struct s_annealing_sched annealing_sched) 01118 { 01119 if(annealing_sched.type == USER_SCHED) 01120 { 01121 if(t < annealing_sched.exit_t) 01122 { 01123 return (1); 01124 } 01125 else 01126 { 01127 return (0); 01128 } 01129 } 01130 01131 /* Automatic annealing schedule */ 01132 01133 if(t < 0.005 * cost / num_nets) 01134 { 01135 return (1); 01136 } 01137 else 01138 { 01139 return (0); 01140 } 01141 } 01142 01143 01144 /** Finds the starting temperature (hot condition). */ 01145 static float 01146 starting_t(float *cost_ptr, 01147 float *bb_cost_ptr, 01148 float *timing_cost_ptr, 01149 int place_cost_type, 01150 float **old_region_occ_x, 01151 float **old_region_occ_y, 01152 int num_regions, 01153 boolean fixed_pins, 01154 struct s_annealing_sched annealing_sched, 01155 int max_moves, 01156 float rlim, 01157 enum e_place_algorithm place_algorithm, 01158 float timing_tradeoff, 01159 float inverse_prev_bb_cost, 01160 float inverse_prev_timing_cost, 01161 float *delay_cost_ptr) 01162 { 01163 int i, num_accepted, move_lim; 01164 double std_dev, av, sum_of_squares; /* Double important to avoid round off */ 01165 int *x_lookup; 01166 01167 x_lookup = (int *)my_malloc(nx * sizeof(int)); 01168 01169 if(annealing_sched.type == USER_SCHED) 01170 return (annealing_sched.init_t); 01171 01172 move_lim = min(max_moves, num_blocks); 01173 01174 num_accepted = 0; 01175 av = 0.; 01176 sum_of_squares = 0.; 01177 01178 /* Try one move per block. Set t high so essentially all accepted. */ 01179 01180 for(i = 0; i < move_lim; i++) 01181 { 01182 if(try_swap(1.e30, cost_ptr, bb_cost_ptr, timing_cost_ptr, rlim, 01183 place_cost_type, 01184 old_region_occ_x, old_region_occ_y, num_regions, 01185 fixed_pins, place_algorithm, timing_tradeoff, 01186 inverse_prev_bb_cost, inverse_prev_timing_cost, 01187 delay_cost_ptr, x_lookup) == 1) 01188 { 01189 num_accepted++; 01190 av += *cost_ptr; 01191 sum_of_squares += *cost_ptr * (*cost_ptr); 01192 } 01193 } 01194 01195 if(num_accepted != 0) 01196 av /= num_accepted; 01197 else 01198 av = 0.; 01199 01200 std_dev = get_std_dev(num_accepted, sum_of_squares, av); 01201 01202 #ifdef DEBUG 01203 if(num_accepted != move_lim) 01204 { 01205 printf 01206 ("Warning: Starting t: %d of %d configurations accepted.\n", 01207 num_accepted, move_lim); 01208 } 01209 #endif 01210 01211 #ifdef VERBOSE 01212 printf("std_dev: %g, average cost: %g, starting temp: %g\n", 01213 std_dev, av, 20. * std_dev); 01214 #endif 01215 01216 free(x_lookup); 01217 01218 /* Set the initial temperature to 20 times the standard of deviation */ 01219 /* so that the initial temperature adjusts according to the circuit */ 01220 return (20. * std_dev); 01221 } 01222 01223 /** Picks some block and moves it to another spot. If this spot is 01224 * occupied, switch the blocks. Assess the change in cost function 01225 * and accept or reject the move. If rejected, return 0. If 01226 * accepted return 1. Pass back the new value of the cost function. 01227 * rlim is the range limiter. 01228 */ 01229 static int 01230 try_swap(float t, 01231 float *cost, 01232 float *bb_cost, 01233 float *timing_cost, 01234 float rlim, 01235 int place_cost_type, 01236 float **old_region_occ_x, 01237 float **old_region_occ_y, 01238 int num_regions, 01239 boolean fixed_pins, 01240 enum e_place_algorithm place_algorithm, 01241 float timing_tradeoff, 01242 float inverse_prev_bb_cost, 01243 float inverse_prev_timing_cost, 01244 float *delay_cost, 01245 int *x_lookup) 01246 { 01247 01248 int b_from, x_to, y_to, z_to, x_from, y_from, z_from, b_to; 01249 int i, k, inet, keep_switch, num_of_pins, max_pins_per_clb; 01250 int num_nets_affected, bb_index; 01251 float delta_c, bb_delta_c, timing_delta_c, delay_delta_c, newcost; 01252 static struct s_bb *bb_coord_new = NULL; 01253 static struct s_bb *bb_edge_new = NULL; 01254 static int *nets_to_update = NULL, *net_block_moved = NULL; 01255 01256 max_pins_per_clb = 0; 01257 for(i = 0; i < num_types; i++) 01258 { 01259 max_pins_per_clb = 01260 max(max_pins_per_clb, type_descriptors[i].num_pins); 01261 } 01262 01263 /* Allocate the local bb_coordinate storage, etc. only once. */ 01264 01265 if(bb_coord_new == NULL) 01266 { 01267 bb_coord_new = (struct s_bb *)my_malloc(2 * max_pins_per_clb * 01268 sizeof(struct s_bb)); 01269 bb_edge_new = (struct s_bb *)my_malloc(2 * max_pins_per_clb * 01270 sizeof(struct s_bb)); 01271 nets_to_update = 01272 (int *)my_malloc(2 * max_pins_per_clb * sizeof(int)); 01273 net_block_moved = 01274 (int *)my_malloc(2 * max_pins_per_clb * sizeof(int)); 01275 } 01276 01277 01278 delay_delta_c = 0.0; 01279 b_from = my_irand(num_blocks - 1); 01280 01281 /* If the pins are fixed we never move them from their initial * 01282 * random locations. The code below could be made more efficient * 01283 * by using the fact that pins appear first in the block list, * 01284 * but this shouldn't cause any significant slowdown and won't be * 01285 * broken if I ever change the parser so that the pins aren't * 01286 * necessarily at the start of the block list. */ 01287 01288 if(fixed_pins == TRUE) 01289 { 01290 while(block[b_from].type == IO_TYPE) 01291 { 01292 b_from = my_irand(num_blocks - 1); 01293 } 01294 } 01295 01296 x_from = block[b_from].x; 01297 y_from = block[b_from].y; 01298 z_from = block[b_from].z; 01299 01300 if(!find_to 01301 (x_from, y_from, block[b_from].type, rlim, x_lookup, &x_to, &y_to)) 01302 return FALSE; 01303 01304 /* Make the switch in order to make computing the new bounding * 01305 * box simpler. If the cost increase is too high, switch them * 01306 * back. (block data structures switched, clbs not switched * 01307 * until success of move is determined.) */ 01308 01309 z_to = 0; 01310 if(grid[x_to][y_to].type->capacity > 1) 01311 { 01312 z_to = my_irand(grid[x_to][y_to].type->capacity - 1); 01313 } 01314 if(grid[x_to][y_to].blocks[z_to] == EMPTY) 01315 { /* Moving to an empty location */ 01316 b_to = EMPTY; 01317 block[b_from].x = x_to; 01318 block[b_from].y = y_to; 01319 block[b_from].z = z_to; 01320 } 01321 else 01322 { /* Swapping two blocks */ 01323 b_to = grid[x_to][y_to].blocks[z_to]; 01324 block[b_to].x = x_from; 01325 block[b_to].y = y_from; 01326 block[b_to].z = z_from; 01327 01328 block[b_from].x = x_to; 01329 block[b_from].y = y_to; 01330 block[b_from].z = z_to; 01331 } 01332 01333 /* Now update the cost function. May have to do major optimizations * 01334 * here later. */ 01335 01336 /* I'm using negative values of temp_net_cost as a flag, so DO NOT * 01337 * use cost functions that can go negative. */ 01338 01339 delta_c = 0; /* Change in cost due to this swap. */ 01340 bb_delta_c = 0; 01341 timing_delta_c = 0; 01342 01343 num_of_pins = block[b_from].type->num_pins; 01344 01345 num_nets_affected = find_affected_nets(nets_to_update, net_block_moved, 01346 b_from, b_to, num_of_pins); 01347 01348 if(place_cost_type == NONLINEAR_CONG) 01349 { 01350 save_region_occ(old_region_occ_x, old_region_occ_y, num_regions); 01351 } 01352 01353 bb_index = 0; /* Index of new bounding box. */ 01354 01355 for(k = 0; k < num_nets_affected; k++) 01356 { 01357 inet = nets_to_update[k]; 01358 01359 /* If we swapped two blocks connected to the same net, its bounding box * 01360 * doesn't change. */ 01361 01362 if(net_block_moved[k] == FROM_AND_TO) 01363 continue; 01364 01365 if(clb_net[inet].num_sinks < SMALL_NET) 01366 { 01367 get_non_updateable_bb(inet, &bb_coord_new[bb_index]); 01368 } 01369 else 01370 { 01371 /* TODO: Problem with making net update incremental, turn off for now */ 01372 get_non_updateable_bb(inet, &bb_coord_new[bb_index]); 01373 #if 0 01374 /* TODO: Problem with making net update incremental, turn off for now */ 01375 if(net_block_moved[k] == FROM) 01376 update_bb(inet, &bb_coord_new[bb_index], 01377 &bb_edge_new[bb_index], x_from, y_from, 01378 x_to, y_to); 01379 else 01380 update_bb(inet, &bb_coord_new[bb_index], 01381 &bb_edge_new[bb_index], x_to, y_to, x_from, 01382 y_from); 01383 #endif 01384 } 01385 01386 if(place_cost_type != NONLINEAR_CONG) 01387 { 01388 temp_net_cost[inet] = 01389 get_net_cost(inet, &bb_coord_new[bb_index]); 01390 bb_delta_c += temp_net_cost[inet] - net_cost[inet]; 01391 } 01392 else 01393 { 01394 /* Rip up, then replace with new bb. */ 01395 update_region_occ(inet, &bb_coords[inet], -1, 01396 num_regions); 01397 update_region_occ(inet, &bb_coord_new[bb_index], 1, 01398 num_regions); 01399 } 01400 01401 bb_index++; 01402 } 01403 01404 if(place_cost_type == NONLINEAR_CONG) 01405 { 01406 newcost = nonlinear_cong_cost(num_regions); 01407 bb_delta_c = newcost - *bb_cost; 01408 } 01409 01410 01411 if(place_algorithm == NET_TIMING_DRIVEN_PLACE || 01412 place_algorithm == PATH_TIMING_DRIVEN_PLACE) 01413 { 01414 /*in this case we redefine delta_c as a combination of timing and bb. * 01415 *additionally, we normalize all values, therefore delta_c is in * 01416 *relation to 1*/ 01417 01418 comp_delta_td_cost(b_from, b_to, num_of_pins, &timing_delta_c, 01419 &delay_delta_c); 01420 01421 delta_c = 01422 (1 - timing_tradeoff) * bb_delta_c * inverse_prev_bb_cost + 01423 timing_tradeoff * timing_delta_c * inverse_prev_timing_cost; 01424 } 01425 else 01426 { 01427 delta_c = bb_delta_c; 01428 } 01429 01430 01431 keep_switch = assess_swap(delta_c, t); 01432 01433 /* 1 -> move accepted, 0 -> rejected. */ 01434 01435 if(keep_switch) 01436 { 01437 *cost = *cost + delta_c; 01438 *bb_cost = *bb_cost + bb_delta_c; 01439 01440 01441 if(place_algorithm == NET_TIMING_DRIVEN_PLACE || 01442 place_algorithm == PATH_TIMING_DRIVEN_PLACE) 01443 { 01444 /*update the point_to_point_timing_cost and point_to_point_delay_cost 01445 * values from the temporary values */ 01446 *timing_cost = *timing_cost + timing_delta_c; 01447 *delay_cost = *delay_cost + delay_delta_c; 01448 01449 update_td_cost(b_from, b_to, num_of_pins); 01450 } 01451 01452 /* update net cost functions and reset flags. */ 01453 01454 bb_index = 0; 01455 01456 for(k = 0; k < num_nets_affected; k++) 01457 { 01458 inet = nets_to_update[k]; 01459 01460 /* If we swapped two blocks connected to the same net, its bounding box * 01461 * doesn't change. */ 01462 01463 if(net_block_moved[k] == FROM_AND_TO) 01464 { 01465 temp_net_cost[inet] = -1; 01466 continue; 01467 } 01468 01469 bb_coords[inet] = bb_coord_new[bb_index]; 01470 if(clb_net[inet].num_sinks >= SMALL_NET) 01471 bb_num_on_edges[inet] = bb_edge_new[bb_index]; 01472 01473 bb_index++; 01474 01475 net_cost[inet] = temp_net_cost[inet]; 01476 temp_net_cost[inet] = -1; 01477 } 01478 01479 /* Update clb data structures since we kept the move. */ 01480 /* Swap physical location */ 01481 grid[x_to][y_to].blocks[z_to] = b_from; 01482 grid[x_from][y_from].blocks[z_from] = b_to; 01483 01484 if(EMPTY == b_to) 01485 { /* Moved to an empty location */ 01486 grid[x_to][y_to].usage++; 01487 grid[x_from][y_from].usage--; 01488 } 01489 } 01490 01491 else 01492 { /* Move was rejected. */ 01493 01494 /* Reset the net cost function flags first. */ 01495 for(k = 0; k < num_nets_affected; k++) 01496 { 01497 inet = nets_to_update[k]; 01498 temp_net_cost[inet] = -1; 01499 } 01500 01501 /* Restore the block data structures to their state before the move. */ 01502 block[b_from].x = x_from; 01503 block[b_from].y = y_from; 01504 block[b_from].z = z_from; 01505 if(b_to != EMPTY) 01506 { 01507 block[b_to].x = x_to; 01508 block[b_to].y = y_to; 01509 block[b_to].z = z_to; 01510 } 01511 01512 /* Restore the region occupancies to their state before the move. */ 01513 if(place_cost_type == NONLINEAR_CONG) 01514 { 01515 restore_region_occ(old_region_occ_x, old_region_occ_y, 01516 num_regions); 01517 } 01518 } 01519 01520 return (keep_switch); 01521 } 01522 01523 01524 /** Saves the old occupancies of the placement subregions in case the 01525 * current move is not accepted. Used only for NONLINEAR_CONG. 01526 */ 01527 static void 01528 save_region_occ(float **old_region_occ_x, 01529 float **old_region_occ_y, 01530 int num_regions) 01531 { 01532 int i, j; 01533 01534 for(i = 0; i < num_regions; i++) 01535 { 01536 for(j = 0; j < num_regions; j++) 01537 { 01538 old_region_occ_x[i][j] = place_region_x[i][j].occupancy; 01539 old_region_occ_y[i][j] = place_region_y[i][j].occupancy; 01540 } 01541 } 01542 } 01543 01544 01545 /** Restores the old occupancies of the placement subregions when the 01546 * current move is not accepted. Used only for NONLINEAR_CONG. 01547 */ 01548 static void 01549 restore_region_occ(float **old_region_occ_x, 01550 float **old_region_occ_y, 01551 int num_regions) 01552 { 01553 01554 int i, j; 01555 01556 for(i = 0; i < num_regions; i++) 01557 { 01558 for(j = 0; j < num_regions; j++) 01559 { 01560 place_region_x[i][j].occupancy = old_region_occ_x[i][j]; 01561 place_region_y[i][j].occupancy = old_region_occ_y[i][j]; 01562 } 01563 } 01564 } 01565 01566 01567 /*( Puts a list of all the nets connected to b_from and b_to into 01568 * nets_to_update. Returns the number of affected nets. Net_block_moved 01569 * is either FROM, TO or FROM_AND_TO -- the block connected to this net 01570 * that has moved. 01571 */ 01572 static int 01573 find_affected_nets(int *nets_to_update, 01574 int *net_block_moved, 01575 int b_from, 01576 int b_to, 01577 int num_of_pins) 01578 { 01579 01580 int k, inet, affected_index, count; 01581 01582 affected_index = 0; 01583 01584 for(k = 0; k < num_of_pins; k++) 01585 { 01586 inet = block[b_from].nets[k]; 01587 01588 if(inet == OPEN) 01589 continue; 01590 01591 if(clb_net[inet].is_global) 01592 continue; 01593 01594 /* This is here in case the same block connects to a net twice. */ 01595 01596 if(temp_net_cost[inet] > 0.) 01597 continue; 01598 01599 nets_to_update[affected_index] = inet; 01600 net_block_moved[affected_index] = FROM; 01601 affected_index++; 01602 temp_net_cost[inet] = 1.; /* Flag to say we've marked this net. */ 01603 } 01604 01605 if(b_to != EMPTY) 01606 { 01607 for(k = 0; k < num_of_pins; k++) 01608 { 01609 inet = block[b_to].nets[k]; 01610 01611 if(inet == OPEN) 01612 continue; 01613 01614 if(clb_net[inet].is_global) 01615 continue; 01616 01617 if(temp_net_cost[inet] > 0.) 01618 { /* Net already marked. */ 01619 for(count = 0; count < affected_index; count++) 01620 { 01621 if(nets_to_update[count] == inet) 01622 { 01623 if(net_block_moved[count] == FROM) 01624 net_block_moved[count] = 01625 FROM_AND_TO; 01626 break; 01627 } 01628 } 01629 01630 #ifdef DEBUG 01631 if(count > affected_index) 01632 { 01633 printf 01634 ("Error in find_affected_nets -- count = %d," 01635 " affected index = %d.\n", count, 01636 affected_index); 01637 exit(1); 01638 } 01639 #endif 01640 } 01641 01642 else 01643 { /* Net not marked yet. */ 01644 01645 nets_to_update[affected_index] = inet; 01646 net_block_moved[affected_index] = TO; 01647 affected_index++; 01648 temp_net_cost[inet] = 1.; /* Flag means we've marked net. */ 01649 } 01650 } 01651 } 01652 01653 return (affected_index); 01654 } 01655 01656 01657 /** Returns the point to which I want to swap, properly range limited. 01658 * rlim must always be between 1 and nx (inclusive) for this routine 01659 * to work. Assumes that a column only contains blocks of the same type. 01660 */ 01661 static boolean 01662 find_to(int x_from, 01663 int y_from, 01664 t_type_ptr type, 01665 float rlim, 01666 int *x_lookup, 01667 int *x_to, 01668 int *y_to) 01669 { 01670 01671 int x_rel, y_rel, iside, iplace, rlx, rly, min_x, max_x, min_y, max_y; 01672 int num_col_same_type, i, j; 01673 01674 rlx = min(nx, rlim); /* Only needed when nx < ny. */ 01675 rly = min(ny, rlim); /* Added rly for aspect_ratio != 1 case. */ 01676 01677 min_x = max(1, x_from - rlx); 01678 max_x = min(nx, x_from + rlx); 01679 min_y = max(1, y_from - rly); 01680 max_y = min(ny, y_from + rly); 01681 01682 num_col_same_type = 0; 01683 j = 0; 01684 if(type != IO_TYPE) 01685 { 01686 for(i = min_x; i <= max_x; i++) 01687 { 01688 if(grid[i][1].type == type) 01689 { 01690 num_col_same_type++; 01691 x_lookup[j] = i; 01692 j++; 01693 } 01694 } 01695 assert(num_col_same_type != 0); 01696 if(num_col_same_type == 1 && 01697 ((((max_y - min_y) / type->height) - 1) <= 0 01698 || type->height > (ny / 2))) 01699 return FALSE; 01700 } 01701 01702 #ifdef DEBUG 01703 if(rlx < 1 || rlx > nx) 01704 { 01705 printf("Error in find_to: rlx = %d\n", rlx); 01706 exit(1); 01707 } 01708 #endif 01709 01710 do 01711 { /* Until (x_to, y_to) different from (x_from, y_from) */ 01712 if(type == IO_TYPE) 01713 { /* io_block to be moved. */ 01714 if(rlx >= nx) 01715 { 01716 iside = my_irand(3); 01717 /* * 01718 * +-----1----+ * 01719 * | | * 01720 * | | * 01721 * 0 2 * 01722 * | | * 01723 * | | * 01724 * +-----3----+ * 01725 * */ 01726 switch (iside) 01727 { 01728 case 0: 01729 iplace = my_irand(ny - 1) + 1; 01730 *x_to = 0; 01731 *y_to = iplace; 01732 break; 01733 case 1: 01734 iplace = my_irand(nx - 1) + 1; 01735 *x_to = iplace; 01736 *y_to = ny + 1; 01737 break; 01738 case 2: 01739 iplace = my_irand(ny - 1) + 1; 01740 *x_to = nx + 1; 01741 *y_to = iplace; 01742 break; 01743 case 3: 01744 iplace = my_irand(nx - 1) + 1; 01745 *x_to = iplace; 01746 *y_to = 0; 01747 break; 01748 default: 01749 printf 01750 ("Error in find_to. Unexpected io swap location.\n"); 01751 exit(1); 01752 } 01753 } 01754 else 01755 { /* rlx is less than whole chip */ 01756 if(x_from == 0) 01757 { 01758 iplace = my_irand(2 * rly); 01759 *y_to = y_from - rly + iplace; 01760 *x_to = x_from; 01761 if(*y_to > ny) 01762 { 01763 *y_to = ny + 1; 01764 *x_to = my_irand(rlx - 1) + 1; 01765 } 01766 else if(*y_to < 1) 01767 { 01768 *y_to = 0; 01769 *x_to = my_irand(rlx - 1) + 1; 01770 } 01771 } 01772 else if(x_from == nx + 1) 01773 { 01774 iplace = my_irand(2 * rly); 01775 *y_to = y_from - rly + iplace; 01776 *x_to = x_from; 01777 if(*y_to > ny) 01778 { 01779 *y_to = ny + 1; 01780 *x_to = nx - my_irand(rlx - 1); 01781 } 01782 else if(*y_to < 1) 01783 { 01784 *y_to = 0; 01785 *x_to = nx - my_irand(rlx - 1); 01786 } 01787 } 01788 else if(y_from == 0) 01789 { 01790 iplace = my_irand(2 * rlx); 01791 *x_to = x_from - rlx + iplace; 01792 *y_to = y_from; 01793 if(*x_to > nx) 01794 { 01795 *x_to = nx + 1; 01796 *y_to = my_irand(rly - 1) + 1; 01797 } 01798 else if(*x_to < 1) 01799 { 01800 *x_to = 0; 01801 *y_to = my_irand(rly - 1) + 1; 01802 } 01803 } 01804 else 01805 { /* *y_from == ny + 1 */ 01806 iplace = my_irand(2 * rlx); 01807 *x_to = x_from - rlx + iplace; 01808 *y_to = y_from; 01809 if(*x_to > nx) 01810 { 01811 *x_to = nx + 1; 01812 *y_to = ny - my_irand(rly - 1); 01813 } 01814 else if(*x_to < 1) 01815 { 01816 *x_to = 0; 01817 *y_to = ny - my_irand(rly - 1); 01818 } 01819 } 01820 } /* End rlx if */ 01821 } /* end type if */ 01822 else 01823 { 01824 if(nx == 1 && ny == 1) { 01825 return FALSE; 01826 } 01827 x_rel = my_irand(num_col_same_type - 1); 01828 y_rel = 01829 my_irand(max 01830 (0, ((max_y - min_y) / type->height) - 1)); 01831 *x_to = x_lookup[x_rel]; 01832 *y_to = min_y + y_rel * type->height; 01833 *y_to = (*y_to) - grid[*x_to][*y_to].offset; /* align it */ 01834 assert(*x_to >= 1 && *x_to <= nx); 01835 assert(*y_to >= 1 && *y_to <= ny); 01836 } 01837 } 01838 while((x_from == *x_to) && (y_from == *y_to)); 01839 01840 #ifdef DEBUG 01841 if(*x_to < 0 || *x_to > nx + 1 || *y_to < 0 || *y_to > ny + 1) 01842 { 01843 printf("Error in routine find_to: (x_to,y_to) = (%d,%d)\n", 01844 *x_to, *y_to); 01845 exit(1); 01846 } 01847 #endif 01848 assert(type == grid[*x_to][*y_to].type); 01849 return TRUE; 01850 } 01851 01852 01853 /** Returns: 1 -> move accepted, 0 -> rejected. */ 01854 static int 01855 assess_swap(float delta_c, 01856 float t) 01857 { 01858 int accept; 01859 float prob_fac, fnum; 01860 01861 if(delta_c <= 0) 01862 { 01863 01864 #ifdef SPEC /* Reduce variation in final solution due to round off */ 01865 fnum = my_frand(); 01866 #endif 01867 01868 accept = 1; 01869 return (accept); 01870 } 01871 01872 if(t == 0.) 01873 return (0); 01874 01875 fnum = my_frand(); 01876 prob_fac = exp(-delta_c / t); 01877 if(prob_fac > fnum) 01878 { 01879 accept = 1; 01880 } 01881 else 01882 { 01883 accept = 0; 01884 } 01885 return (accept); 01886 } 01887 01888 01889 /** Recomputes the cost to eliminate roundoff that may have accrued. 01890 * This routine does as little work as possible to compute this new 01891 * cost. 01892 */ 01893 static float 01894 recompute_bb_cost(int place_cost_type, 01895 int num_regions) 01896 { 01897 01898 int i, j, inet; 01899 float cost; 01900 01901 cost = 0; 01902 01903 /* Initialize occupancies to zero if regions are being used. */ 01904 01905 if(place_cost_type == NONLINEAR_CONG) 01906 { 01907 for(i = 0; i < num_regions; i++) 01908 { 01909 for(j = 0; j < num_regions; j++) 01910 { 01911 place_region_x[i][j].occupancy = 0.; 01912 place_region_y[i][j].occupancy = 0.; 01913 } 01914 } 01915 } 01916 01917 for(inet = 0; inet < num_nets; inet++) 01918 { /* for each net ... */ 01919 01920 if(clb_net[inet].is_global == FALSE) 01921 { /* Do only if not global. */ 01922 01923 /* Bounding boxes don't have to be recomputed; they're correct. */ 01924 01925 if(place_cost_type != NONLINEAR_CONG) 01926 { 01927 cost += net_cost[inet]; 01928 } 01929 else 01930 { /* Must be nonlinear_cong case. */ 01931 update_region_occ(inet, &bb_coords[inet], 1, 01932 num_regions); 01933 } 01934 } 01935 } 01936 01937 if(place_cost_type == NONLINEAR_CONG) 01938 { 01939 cost = nonlinear_cong_cost(num_regions); 01940 } 01941 01942 return (cost); 01943 } 01944 01945 01946 /* *returns the delay of one point to point connection */ 01947 static float 01948 comp_td_point_to_point_delay(int inet, 01949 int ipin) 01950 { 01951 int source_block, sink_block; 01952 int delta_x, delta_y; 01953 t_type_ptr source_type, sink_type; 01954 float delay_source_to_sink; 01955 01956 delay_source_to_sink = 0.; 01957 01958 source_block = clb_net[inet].node_block[0]; 01959 source_type = block[source_block].type; 01960 01961 sink_block = clb_net[inet].node_block[ipin]; 01962 sink_type = block[sink_block].type; 01963 01964 assert(source_type != NULL); 01965 assert(sink_type != NULL); 01966 01967 delta_x = abs(block[sink_block].x - block[source_block].x); 01968 delta_y = abs(block[sink_block].y - block[source_block].y); 01969 01970 /* TODO low priority: Could be merged into one look-up table */ 01971 /* Note: This heuristic is terrible on Quality of Results. 01972 * A much better heuristic is to create a more comprehensive lookup table but 01973 * it's too late in the release cycle to do this. Pushing until the next release */ 01974 if(source_type == IO_TYPE) 01975 { 01976 if(sink_type == IO_TYPE) 01977 delay_source_to_sink = delta_io_to_io[delta_x][delta_y]; 01978 else 01979 delay_source_to_sink = delta_io_to_clb[delta_x][delta_y]; 01980 } 01981 else 01982 { 01983 if(sink_type == IO_TYPE) 01984 delay_source_to_sink = delta_clb_to_io[delta_x][delta_y]; 01985 else 01986 delay_source_to_sink = delta_clb_to_clb[delta_x][delta_y]; 01987 } 01988 if(delay_source_to_sink < 0) 01989 { 01990 printf 01991 ("Error in comp_td_point_to_point_delay in place.c, bad delay_source_to_sink value\n"); 01992 exit(1); 01993 } 01994 01995 if(delay_source_to_sink < 0.) 01996 { 01997 printf 01998 ("Error in comp_td_point_to_point_delay in place.c, delay is less than 0\n"); 01999 exit(1); 02000 } 02001 02002 return (delay_source_to_sink); 02003 } 02004 02005 02006 02007 /** update the point_to_point_timing_cost values from the temporary 02008 * values for all connections that have changed 02009 */ 02010 static void 02011 update_td_cost(int b_from, 02012 int b_to, 02013 int num_of_pins) 02014 { 02015 int blkpin, net_pin, inet, ipin; 02016 02017 for(blkpin = 0; blkpin < num_of_pins; blkpin++) 02018 { 02019 02020 inet = block[b_from].nets[blkpin]; 02021 02022 if(inet == OPEN) 02023 continue; 02024 02025 if(clb_net[inet].is_global) 02026 continue; 02027 02028 net_pin = net_pin_index[b_from][blkpin]; 02029 02030 if(net_pin != 0) 02031 { 02032 02033 /*the following "if" prevents the value from being updated twice */ 02034 if(clb_net[inet].node_block[0] != b_to 02035 && clb_net[inet].node_block[0] != b_from) 02036 { 02037 02038 point_to_point_delay_cost[inet][net_pin] = 02039 temp_point_to_point_delay_cost[inet][net_pin]; 02040 temp_point_to_point_delay_cost[inet][net_pin] = 02041 -1; 02042 02043 point_to_point_timing_cost[inet][net_pin] = 02044 temp_point_to_point_timing_cost[inet] 02045 [net_pin]; 02046 temp_point_to_point_timing_cost[inet][net_pin] = 02047 -1; 02048 } 02049 } 02050 else 02051 { /*this net is being driven by a moved block, recompute */ 02052 /*all point to point connections on this net. */ 02053 for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) 02054 { 02055 02056 point_to_point_delay_cost[inet][ipin] = 02057 temp_point_to_point_delay_cost[inet][ipin]; 02058 temp_point_to_point_delay_cost[inet][ipin] = -1; 02059 02060 point_to_point_timing_cost[inet][ipin] = 02061 temp_point_to_point_timing_cost[inet][ipin]; 02062 temp_point_to_point_timing_cost[inet][ipin] = -1; 02063 } 02064 } 02065 } 02066 02067 if(b_to != EMPTY) 02068 { 02069 for(blkpin = 0; blkpin < num_of_pins; blkpin++) 02070 { 02071 02072 inet = block[b_to].nets[blkpin]; 02073 02074 if(inet == OPEN) 02075 continue; 02076 02077 if(clb_net[inet].is_global) 02078 continue; 02079 02080 net_pin = net_pin_index[b_to][blkpin]; 02081 02082 if(net_pin != 0) 02083 { 02084 02085 /*the following "if" prevents the value from being updated 2x */ 02086 if(clb_net[inet].node_block[0] != b_to 02087 && clb_net[inet].node_block[0] != b_from) 02088 { 02089 02090 point_to_point_delay_cost[inet][net_pin] = 02091 temp_point_to_point_delay_cost[inet] 02092 [net_pin]; 02093 temp_point_to_point_delay_cost[inet] 02094 [net_pin] = -1; 02095 02096 point_to_point_timing_cost[inet][net_pin] 02097 = 02098 temp_point_to_point_timing_cost[inet] 02099 [net_pin]; 02100 temp_point_to_point_timing_cost[inet] 02101 [net_pin] = -1; 02102 } 02103 } 02104 else 02105 { /*this net is being driven by a moved block, recompute */ 02106 /*all point to point connections on this net. */ 02107 for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) 02108 { 02109 02110 point_to_point_delay_cost[inet][ipin] = 02111 temp_point_to_point_delay_cost[inet] 02112 [ipin]; 02113 temp_point_to_point_delay_cost[inet][ipin] 02114 = -1; 02115 02116 point_to_point_timing_cost[inet][ipin] = 02117 temp_point_to_point_timing_cost[inet] 02118 [ipin]; 02119 temp_point_to_point_timing_cost[inet] 02120 [ipin] = -1; 02121 } 02122 } 02123 } 02124 } 02125 } 02126 02127 /** a net that is being driven by a moved block must have all of its 02128 * sink timing costs recomputed. A net that is driving a moved block 02129 * must only have the timing cost on the connection driving the input 02130 * pin computed 02131 */ 02132 static void 02133 comp_delta_td_cost(int b_from, 02134 int b_to, 02135 int num_of_pins, 02136 float *delta_timing, 02137 float *delta_delay) 02138 { 02139 02140 int inet, k, net_pin, ipin; 02141 float delta_timing_cost, delta_delay_cost, temp_delay; 02142 02143 delta_timing_cost = 0.; 02144 delta_delay_cost = 0.; 02145 02146 02147 for(k = 0; k < num_of_pins; k++) 02148 { 02149 inet = block[b_from].nets[k]; 02150 02151 if(inet == OPEN) 02152 continue; 02153 02154 if(clb_net[inet].is_global) 02155 continue; 02156 02157 net_pin = net_pin_index[b_from][k]; 02158 02159 if(net_pin != 0) 02160 { /*this net is driving a moved block */ 02161 02162 /*if this net is being driven by a block that has moved, we do not */ 02163 /*need to compute the change in the timing cost (here) since it will */ 02164 /*be computed in the fanout of the net on the driving block, also */ 02165 /*computing it here would double count the change, and mess up the */ 02166 /*delta_timing_cost value */ 02167 if(clb_net[inet].node_block[0] != b_to 02168 && clb_net[inet].node_block[0] != b_from) 02169 { 02170 temp_delay = 02171 comp_td_point_to_point_delay(inet, net_pin); 02172 02173 temp_point_to_point_delay_cost[inet][net_pin] = 02174 temp_delay; 02175 temp_point_to_point_timing_cost[inet][net_pin] = 02176 timing_place_crit[inet][net_pin] * temp_delay; 02177 02178 delta_delay_cost += 02179 temp_point_to_point_delay_cost[inet][net_pin] 02180 - point_to_point_delay_cost[inet][net_pin]; 02181 02182 delta_timing_cost += 02183 temp_point_to_point_timing_cost[inet][net_pin] 02184 - point_to_point_timing_cost[inet][net_pin]; 02185 } 02186 } 02187 else 02188 { /*this net is being driven by a moved block, recompute */ 02189 /*all point to point connections on this net. */ 02190 for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) 02191 { 02192 temp_delay = 02193 comp_td_point_to_point_delay(inet, ipin); 02194 02195 temp_point_to_point_delay_cost[inet][ipin] = 02196 temp_delay; 02197 temp_point_to_point_timing_cost[inet][ipin] = 02198 timing_place_crit[inet][ipin] * temp_delay; 02199 02200 delta_delay_cost += 02201 temp_point_to_point_delay_cost[inet][ipin] - 02202 point_to_point_delay_cost[inet][ipin]; 02203 02204 delta_timing_cost += 02205 temp_point_to_point_timing_cost[inet][ipin] - 02206 point_to_point_timing_cost[inet][ipin]; 02207 } 02208 } 02209 } 02210 02211 if(b_to != EMPTY) 02212 { 02213 for(k = 0; k < num_of_pins; k++) 02214 { 02215 inet = block[b_to].nets[k]; 02216 02217 if(inet == OPEN) 02218 continue; 02219 02220 if(clb_net[inet].is_global) 02221 continue; 02222 02223 net_pin = net_pin_index[b_to][k]; 02224 02225 if(net_pin != 0) 02226 { /*this net is driving a moved block */ 02227 02228 /*if this net is being driven by a block that has moved, we do not */ 02229 /*need to compute the change in the timing cost (here) since it was */ 02230 /*computed in the fanout of the net on the driving block, also */ 02231 /*computing it here would double count the change, and mess up the */ 02232 /*delta_timing_cost value */ 02233 if(clb_net[inet].node_block[0] != b_to 02234 && clb_net[inet].node_block[0] != b_from) 02235 { 02236 temp_delay = 02237 comp_td_point_to_point_delay(inet, 02238 net_pin); 02239 02240 temp_point_to_point_delay_cost[inet] 02241 [net_pin] = temp_delay; 02242 temp_point_to_point_timing_cost[inet] 02243 [net_pin] = 02244 timing_place_crit[inet][net_pin] * 02245 temp_delay; 02246 02247 delta_delay_cost += 02248 temp_point_to_point_delay_cost[inet] 02249 [net_pin] - 02250 point_to_point_delay_cost[inet] 02251 [net_pin]; 02252 delta_timing_cost += 02253 temp_point_to_point_timing_cost[inet] 02254 [net_pin] - 02255 point_to_point_timing_cost[inet] 02256 [net_pin]; 02257 } 02258 } 02259 else 02260 { /*this net is being driven by a moved block, recompute */ 02261 /*all point to point connections on this net. */ 02262 for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) 02263 { 02264 02265 temp_delay = 02266 comp_td_point_to_point_delay(inet, 02267 ipin); 02268 02269 temp_point_to_point_delay_cost[inet][ipin] 02270 = temp_delay; 02271 temp_point_to_point_timing_cost[inet] 02272 [ipin] = 02273 timing_place_crit[inet][ipin] * 02274 temp_delay; 02275 02276 02277 delta_delay_cost += 02278 temp_point_to_point_delay_cost[inet] 02279 [ipin] - 02280 point_to_point_delay_cost[inet][ipin]; 02281 delta_timing_cost += 02282 temp_point_to_point_timing_cost[inet] 02283 [ipin] - 02284 point_to_point_timing_cost[inet] 02285 [ipin]; 02286 } 02287 } 02288 } 02289 } 02290 02291 *delta_timing = delta_timing_cost; 02292 *delta_delay = delta_delay_cost; 02293 } 02294 02295 /* computes the cost (from scratch) due to the delays and criticalities 02296 * on all point to point connections, we define the timing cost of 02297 * each connection as criticality*delay 02298 */ 02299 static void 02300 comp_td_costs(float *timing_cost, 02301 float *connection_delay_sum) 02302 { 02303 int inet, ipin; 02304 float loc_timing_cost, loc_connection_delay_sum, temp_delay_cost, 02305 temp_timing_cost; 02306 02307 loc_timing_cost = 0.; 02308 loc_connection_delay_sum = 0.; 02309 02310 for(inet = 0; inet < num_nets; inet++) 02311 { /* for each net ... */ 02312 if(clb_net[inet].is_global == FALSE) 02313 { /* Do only if not global. */ 02314 02315 for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) 02316 { 02317 02318 temp_delay_cost = 02319 comp_td_point_to_point_delay(inet, ipin); 02320 temp_timing_cost = 02321 temp_delay_cost * 02322 timing_place_crit[inet][ipin]; 02323 02324 loc_connection_delay_sum += temp_delay_cost; 02325 point_to_point_delay_cost[inet][ipin] = 02326 temp_delay_cost; 02327 temp_point_to_point_delay_cost[inet][ipin] = -1; /*undefined */ 02328 02329 point_to_point_timing_cost[inet][ipin] = 02330 temp_timing_cost; 02331 temp_point_to_point_timing_cost[inet][ipin] = -1; /*undefined */ 02332 loc_timing_cost += temp_timing_cost; 02333 } 02334 } 02335 } 02336 *timing_cost = loc_timing_cost; 02337 *connection_delay_sum = loc_connection_delay_sum; 02338 } 02339 02340 /** Finds the cost from scratch. Done only when the placement 02341 * has been radically changed (i.e. after initial placement). 02342 * Otherwise find the cost change incrementally. If method 02343 * check is NORMAL, we find bounding boxes that are updateable 02344 * for the larger nets. If method is CHECK, all bounding boxes 02345 * are found via the non_updateable_bb routine, to provide a 02346 * cost which can be used to check the correctness of the 02347 * other routine. 02348 */ 02349 static float 02350 comp_bb_cost(int method, 02351 int place_cost_type, 02352 int num_regions) 02353 { 02354 02355 int i, j, k; 02356 float cost; 02357 double expected_wirelength; 02358 02359 cost = 0; 02360 expected_wirelength = 0.0; 02361 02362 /* Initialize occupancies to zero if regions are being used. */ 02363 02364 if(place_cost_type == NONLINEAR_CONG) 02365 { 02366 for(i = 0; i < num_regions; i++) 02367 { 02368 for(j = 0; j < num_regions; j++) 02369 { 02370 place_region_x[i][j].occupancy = 0.; 02371 place_region_y[i][j].occupancy = 0.; 02372 } 02373 } 02374 } 02375 02376 for(k = 0; k < num_nets; k++) 02377 { /* for each net ... */ 02378 02379 if(clb_net[k].is_global == FALSE) 02380 { /* Do only if not global. */ 02381 02382 /* Small nets don't use incremental updating on their bounding boxes, * 02383 * so they can use a fast bounding box calculator. */ 02384 02385 if(clb_net[k].num_sinks >= SMALL_NET && method == NORMAL) 02386 { 02387 get_non_updateable_bb(k, &bb_coords[k]); 02388 #if 0 02389 get_bb_from_scratch(k, &bb_coords[k], 02390 &bb_num_on_edges[k]); 02391 #endif 02392 } 02393 else 02394 { 02395 get_non_updateable_bb(k, &bb_coords[k]); 02396 } 02397 02398 if(place_cost_type != NONLINEAR_CONG) 02399 { 02400 net_cost[k] = get_net_cost(k, &bb_coords[k]); 02401 cost += net_cost[k]; 02402 if(method == CHECK) 02403 expected_wirelength += 02404 get_net_wirelength_estimate(k, 02405 &bb_coords 02406 [k]); 02407 } 02408 else 02409 { /* Must be nonlinear_cong case. */ 02410 update_region_occ(k, &bb_coords[k], 1, 02411 num_regions); 02412 } 02413 } 02414 } 02415 02416 if(place_cost_type == NONLINEAR_CONG) 02417 { 02418 cost = nonlinear_cong_cost(num_regions); 02419 } 02420 02421 if(method == CHECK) 02422 printf("BB estimate of min-dist (placement) wirelength is ;%.0f\n", 02423 expected_wirelength); 02424 02425 return (cost); 02426 } 02427 02428 /** This routine computes the cost of a placement when the NONLINEAR_CONG 02429 * option is selected. It assumes that the occupancies of all the 02430 * placement subregions have been properly updated, and simply 02431 * computes the cost due to these occupancies by summing over all 02432 * subregions. This will be inefficient for moves that don't affect 02433 * many subregions (i.e. small moves late in placement), esp. when there 02434 * are a lot of subregions. May recode later to update only affected 02435 * subregions. 02436 */ 02437 static float 02438 nonlinear_cong_cost(int num_regions) 02439 { 02440 02441 float cost, tmp; 02442 int i, j; 02443 02444 cost = 0.; 02445 02446 for(i = 0; i < num_regions; i++) 02447 { 02448 for(j = 0; j < num_regions; j++) 02449 { 02450 02451 /* Many different cost metrics possible. 1st try: */ 02452 02453 if(place_region_x[i][j].occupancy < 02454 place_region_x[i][j].capacity) 02455 { 02456 cost += place_region_x[i][j].occupancy * 02457 place_region_x[i][j].inv_capacity; 02458 } 02459 else 02460 { /* Overused region -- penalize. */ 02461 02462 tmp = place_region_x[i][j].occupancy * 02463 place_region_x[i][j].inv_capacity; 02464 cost += tmp * tmp; 02465 } 02466 02467 if(place_region_y[i][j].occupancy < 02468 place_region_y[i][j].capacity) 02469 { 02470 cost += place_region_y[i][j].occupancy * 02471 place_region_y[i][j].inv_capacity; 02472 } 02473 else 02474 { /* Overused region -- penalize. */ 02475 02476 tmp = place_region_y[i][j].occupancy * 02477 place_region_y[i][j].inv_capacity; 02478 cost += tmp * tmp; 02479 } 02480 02481 } 02482 } 02483 02484 return (cost); 02485 } 02486 02487 02488 /** Called only when the place_cost_type is NONLINEAR_CONG. If add_or_sub 02489 * is 1, this uses the new net bounding box to increase the occupancy 02490 * of some regions. If add_or_sub = - 1, it decreases the occupancy 02491 * by that due to this bounding box. 02492 */ 02493 static void 02494 update_region_occ(int inet, 02495 struct s_bb *coords, 02496 int add_or_sub, 02497 int num_regions) 02498 { 02499 02500 float net_xmin, net_xmax, net_ymin, net_ymax, crossing; 02501 float inv_region_len, inv_region_height; 02502 float inv_bb_len, inv_bb_height; 02503 float overlap_xlow, overlap_xhigh, overlap_ylow, overlap_yhigh; 02504 float y_overlap, x_overlap, x_occupancy, y_occupancy; 02505 int imin, imax, jmin, jmax, i, j; 02506 02507 if(clb_net[inet].num_sinks >= 50) 02508 { 02509 crossing = 2.7933 + 0.02616 * ((clb_net[inet].num_sinks + 1) - 50); 02510 } 02511 else 02512 { 02513 crossing = cross_count[clb_net[inet].num_sinks]; 02514 } 02515 02516 net_xmin = coords->xmin - 0.5; 02517 net_xmax = coords->xmax + 0.5; 02518 net_ymin = coords->ymin - 0.5; 02519 net_ymax = coords->ymax + 0.5; 02520 02521 /* I could precompute the two values below. Should consider this. */ 02522 02523 inv_region_len = (float)num_regions / (float)nx; 02524 inv_region_height = (float)num_regions / (float)ny; 02525 02526 /* Get integer coordinates defining the rectangular area in which the * 02527 * subregions have to be updated. Formula is as follows: subtract * 02528 * 0.5 from net_xmin, etc. to get numbers from 0 to nx or ny; * 02529 * divide by nx or ny to scale between 0 and 1; multiply by * 02530 * num_regions to scale between 0 and num_regions; and truncate to * 02531 * get the final answer. */ 02532 02533 imin = (int)(net_xmin - 0.5) * inv_region_len; 02534 imax = (int)(net_xmax - 0.5) * inv_region_len; 02535 imax = min(imax, num_regions - 1); /* Watch for weird roundoff */ 02536 02537 jmin = (int)(net_ymin - 0.5) * inv_region_height; 02538 jmax = (int)(net_ymax - 0.5) * inv_region_height; 02539 jmax = min(jmax, num_regions - 1); /* Watch for weird roundoff */ 02540 02541 inv_bb_len = 1. / (net_xmax - net_xmin); 02542 inv_bb_height = 1. / (net_ymax - net_ymin); 02543 02544 /* See RISA paper (ICCAD '94, pp. 690 - 695) for a description of why * 02545 * I use exactly this cost function. */ 02546 02547 for(i = imin; i <= imax; i++) 02548 { 02549 for(j = jmin; j <= jmax; j++) 02550 { 02551 overlap_xlow = max(place_region_bounds_x[i], net_xmin); 02552 overlap_xhigh = 02553 min(place_region_bounds_x[i + 1], net_xmax); 02554 overlap_ylow = max(place_region_bounds_y[j], net_ymin); 02555 overlap_yhigh = 02556 min(place_region_bounds_y[j + 1], net_ymax); 02557 02558 x_overlap = overlap_xhigh - overlap_xlow; 02559 y_overlap = overlap_yhigh - overlap_ylow; 02560 02561 #ifdef DEBUG 02562 02563 if(x_overlap < -0.001) 02564 { 02565 printf 02566 ("Error in update_region_occ: x_overlap < 0" 02567 "\n inet = %d, overlap = %g\n", inet, 02568 x_overlap); 02569 } 02570 02571 if(y_overlap < -0.001) 02572 { 02573 printf 02574 ("Error in update_region_occ: y_overlap < 0" 02575 "\n inet = %d, overlap = %g\n", inet, 02576 y_overlap); 02577 } 02578 #endif 02579 02580 02581 x_occupancy = 02582 crossing * y_overlap * x_overlap * inv_bb_height * 02583 inv_region_len; 02584 y_occupancy = 02585 crossing * x_overlap * y_overlap * inv_bb_len * 02586 inv_region_height; 02587 02588 place_region_x[i][j].occupancy += 02589 add_or_sub * x_occupancy; 02590 place_region_y[i][j].occupancy += 02591 add_or_sub * y_occupancy; 02592 } 02593 } 02594 02595 } 02596 02597 02598 /** Frees the place_regions data structures needed by the NONLINEAR_CONG 02599 * cost function. 02600 */ 02601 static void 02602 free_place_regions(int num_regions) 02603 { 02604 02605 free_matrix(place_region_x, 0, num_regions - 1, 0, sizeof(struct 02606 s_place_region)); 02607 02608 free_matrix(place_region_y, 0, num_regions - 1, 0, sizeof(struct 02609 s_place_region)); 02610 02611 free(place_region_bounds_x); 02612 free(place_region_bounds_y); 02613 } 02614 02615 02616 /** Frees the major structures needed by the placer (and not needed 02617 * elsewhere). 02618 */ 02619 static void 02620 free_placement_structs(int place_cost_type, 02621 int num_regions, 02622 float **old_region_occ_x, 02623 float **old_region_occ_y, 02624 struct s_placer_opts placer_opts) 02625 { 02626 int inet; 02627 02628 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 02629 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || 02630 placer_opts.enable_timing_computations) 02631 { 02632 for(inet = 0; inet < num_nets; inet++) 02633 { 02634 /*add one to the address since it is indexed from 1 not 0 */ 02635 02636 point_to_point_delay_cost[inet]++; 02637 free(point_to_point_delay_cost[inet]); 02638 02639 point_to_point_timing_cost[inet]++; 02640 free(point_to_point_timing_cost[inet]); 02641 02642 temp_point_to_point_delay_cost[inet]++; 02643 free(temp_point_to_point_delay_cost[inet]); 02644 02645 temp_point_to_point_timing_cost[inet]++; 02646 free(temp_point_to_point_timing_cost[inet]); 02647 } 02648 free(point_to_point_delay_cost); 02649 free(temp_point_to_point_delay_cost); 02650 02651 free(point_to_point_timing_cost); 02652 free(temp_point_to_point_timing_cost); 02653 02654 free_matrix(net_pin_index, 0, num_blocks - 1, 0, sizeof(int)); 02655 } 02656 02657 02658 free(net_cost); 02659 free(temp_net_cost); 02660 free(bb_num_on_edges); 02661 free(bb_coords); 02662 02663 net_cost = NULL; /* Defensive coding. */ 02664 temp_net_cost = NULL; 02665 bb_num_on_edges = NULL; 02666 bb_coords = NULL; 02667 02668 if(place_cost_type == NONLINEAR_CONG) 02669 { 02670 free_place_regions(num_regions); 02671 free_matrix(old_region_occ_x, 0, num_regions - 1, 0, 02672 sizeof(float)); 02673 free_matrix(old_region_occ_y, 0, num_regions - 1, 0, 02674 sizeof(float)); 02675 } 02676 02677 else if(place_cost_type == LINEAR_CONG) 02678 { 02679 free_fast_cost_update_structs(); 02680 } 02681 } 02682 02683 02684 /** Allocates the major structures needed only by the placer, primarily for 02685 * computing costs quickly and such. 02686 */ 02687 static void 02688 alloc_and_load_placement_structs(int place_cost_type, 02689 int num_regions, 02690 float place_cost_exp, 02691 float ***old_region_occ_x, 02692 float ***old_region_occ_y, 02693 struct s_placer_opts placer_opts) 02694 { 02695 02696 int inet, ipin, max_pins_per_clb, i; 02697 02698 max_pins_per_clb = 0; 02699 for(i = 0; i < num_types; i++) 02700 { 02701 max_pins_per_clb = 02702 max(max_pins_per_clb, type_descriptors[i].num_pins); 02703 } 02704 02705 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 02706 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || 02707 placer_opts.enable_timing_computations) 02708 { 02709 /*allocate structures associated with timing driven placement */ 02710 /* [0..num_nets-1][1..num_pins-1] */ 02711 point_to_point_delay_cost = 02712 (float **)my_malloc(num_nets * sizeof(float *)); 02713 temp_point_to_point_delay_cost = 02714 (float **)my_malloc(num_nets * sizeof(float *)); 02715 02716 point_to_point_timing_cost = 02717 (float **)my_malloc(num_nets * sizeof(float *)); 02718 temp_point_to_point_timing_cost = 02719 (float **)my_malloc(num_nets * sizeof(float *)); 02720 02721 net_pin_index = 02722 (int **)alloc_matrix(0, num_blocks - 1, 0, 02723 max_pins_per_clb - 1, sizeof(int)); 02724 02725 for(inet = 0; inet < num_nets; inet++) 02726 { 02727 02728 /* in the following, subract one so index starts at * 02729 * 1 instead of 0 */ 02730 point_to_point_delay_cost[inet] = 02731 (float *)my_malloc(clb_net[inet].num_sinks * 02732 sizeof(float)); 02733 point_to_point_delay_cost[inet]--; 02734 02735 temp_point_to_point_delay_cost[inet] = 02736 (float *)my_malloc(clb_net[inet].num_sinks * 02737 sizeof(float)); 02738 temp_point_to_point_delay_cost[inet]--; 02739 02740 point_to_point_timing_cost[inet] = 02741 (float *)my_malloc(clb_net[inet].num_sinks * 02742 sizeof(float)); 02743 point_to_point_timing_cost[inet]--; 02744 02745 temp_point_to_point_timing_cost[inet] = 02746 (float *)my_malloc(clb_net[inet].num_sinks * 02747 sizeof(float)); 02748 temp_point_to_point_timing_cost[inet]--; 02749 } 02750 for(inet = 0; inet < num_nets; inet++) 02751 { 02752 for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) 02753 { 02754 point_to_point_delay_cost[inet][ipin] = 0; 02755 temp_point_to_point_delay_cost[inet][ipin] = 0; 02756 } 02757 } 02758 } 02759 02760 02761 02762 02763 02764 net_cost = (float *)my_malloc(num_nets * sizeof(float)); 02765 temp_net_cost = (float *)my_malloc(num_nets * sizeof(float)); 02766 02767 /* Used to store costs for moves not yet made and to indicate when a net's * 02768 * cost has been recomputed. temp_net_cost[inet] < 0 means net's cost hasn't * 02769 * been recomputed. */ 02770 02771 for(inet = 0; inet < num_nets; inet++) 02772 temp_net_cost[inet] = -1.; 02773 02774 bb_coords = (struct s_bb *)my_malloc(num_nets * sizeof(struct s_bb)); 02775 bb_num_on_edges = 02776 (struct s_bb *)my_malloc(num_nets * sizeof(struct s_bb)); 02777 02778 /* Allocate storage for subregion data, if needed. */ 02779 02780 if(place_cost_type == NONLINEAR_CONG) 02781 { 02782 alloc_place_regions(num_regions); 02783 load_place_regions(num_regions); 02784 *old_region_occ_x = (float **)alloc_matrix(0, num_regions - 1, 0, 02785 num_regions - 1, 02786 sizeof(float)); 02787 *old_region_occ_y = 02788 (float **)alloc_matrix(0, num_regions - 1, 0, num_regions - 1, 02789 sizeof(float)); 02790 } 02791 else 02792 { /* Shouldn't use them; crash hard if I do! */ 02793 *old_region_occ_x = NULL; 02794 *old_region_occ_y = NULL; 02795 } 02796 02797 if(place_cost_type == LINEAR_CONG) 02798 { 02799 alloc_and_load_for_fast_cost_update(place_cost_exp); 02800 } 02801 } 02802 02803 /** Allocates memory for the regional occupancy, cost, etc. counts 02804 * kept when we're using the NONLINEAR_CONG placement cost 02805 * function. 02806 */ 02807 static void 02808 alloc_place_regions(int num_regions) 02809 { 02810 02811 place_region_x = 02812 (struct s_place_region **)alloc_matrix(0, num_regions - 1, 0, 02813 num_regions - 1, 02814 sizeof(struct s_place_region)); 02815 02816 place_region_y = 02817 (struct s_place_region **)alloc_matrix(0, num_regions - 1, 0, 02818 num_regions - 1, 02819 sizeof(struct s_place_region)); 02820 02821 place_region_bounds_x = (float *)my_malloc((num_regions + 1) * 02822 sizeof(float)); 02823 02824 place_region_bounds_y = (float *)my_malloc((num_regions + 1) * 02825 sizeof(float)); 02826 } 02827 02828 /** Loads the capacity values in each direction for each of the placement 02829 * regions. The chip is divided into a num_regions x num_regions array. 02830 */ 02831 static void 02832 load_place_regions(int num_regions) 02833 { 02834 02835 int i, j, low_block, high_block, rnum; 02836 float low_lim, high_lim, capacity, fac, block_capacity; 02837 float len_fac, height_fac; 02838 02839 /* First load up horizontal channel capacities. */ 02840 02841 for(j = 0; j < num_regions; j++) 02842 { 02843 capacity = 0.; 02844 low_lim = (float)j / (float)num_regions *ny + 1.; 02845 high_lim = (float)(j + 1) / (float)num_regions *ny; 02846 02847 low_block = floor(low_lim); 02848 low_block = max(1, low_block); /* Watch for weird roundoff effects. */ 02849 high_block = ceil(high_lim); 02850 high_block = min(high_block, ny); 02851 02852 block_capacity = (chan_width_x[low_block - 1] + 02853 chan_width_x[low_block]) / 2.; 02854 if(low_block == 1) 02855 block_capacity += chan_width_x[0] / 2.; 02856 02857 fac = 1. - (low_lim - low_block); 02858 capacity += fac * block_capacity; 02859 02860 for(rnum = low_block + 1; rnum < high_block; rnum++) 02861 { 02862 block_capacity = 02863 (chan_width_x[rnum - 1] + chan_width_x[rnum]) / 2.; 02864 capacity += block_capacity; 02865 } 02866 02867 block_capacity = (chan_width_x[high_block - 1] + 02868 chan_width_x[high_block]) / 2.; 02869 if(high_block == ny) 02870 block_capacity += chan_width_x[ny] / 2.; 02871 02872 fac = 1. - (high_block - high_lim); 02873 capacity += fac * block_capacity; 02874 02875 for(i = 0; i < num_regions; i++) 02876 { 02877 place_region_x[i][j].capacity = capacity; 02878 place_region_x[i][j].inv_capacity = 1. / capacity; 02879 place_region_x[i][j].occupancy = 0.; 02880 place_region_x[i][j].cost = 0.; 02881 } 02882 } 02883 02884 /* Now load vertical channel capacities. */ 02885 02886 for(i = 0; i < num_regions; i++) 02887 { 02888 capacity = 0.; 02889 low_lim = (float)i / (float)num_regions *nx + 1.; 02890 high_lim = (float)(i + 1) / (float)num_regions *nx; 02891 02892 low_block = floor(low_lim); 02893 low_block = max(1, low_block); /* Watch for weird roundoff effects. */ 02894 high_block = ceil(high_lim); 02895 high_block = min(high_block, nx); 02896 02897 block_capacity = (chan_width_y[low_block - 1] + 02898 chan_width_y[low_block]) / 2.; 02899 if(low_block == 1) 02900 block_capacity += chan_width_y[0] / 2.; 02901 02902 fac = 1. - (low_lim - low_block); 02903 capacity += fac * block_capacity; 02904 02905 for(rnum = low_block + 1; rnum < high_block; rnum++) 02906 { 02907 block_capacity = 02908 (chan_width_y[rnum - 1] + chan_width_y[rnum]) / 2.; 02909 capacity += block_capacity; 02910 } 02911 02912 block_capacity = (chan_width_y[high_block - 1] + 02913 chan_width_y[high_block]) / 2.; 02914 if(high_block == nx) 02915 block_capacity += chan_width_y[nx] / 2.; 02916 02917 fac = 1. - (high_block - high_lim); 02918 capacity += fac * block_capacity; 02919 02920 for(j = 0; j < num_regions; j++) 02921 { 02922 place_region_y[i][j].capacity = capacity; 02923 place_region_y[i][j].inv_capacity = 1. / capacity; 02924 place_region_y[i][j].occupancy = 0.; 02925 place_region_y[i][j].cost = 0.; 02926 } 02927 } 02928 02929 /* Finally set up the arrays indicating the limits of each of the * 02930 * placement subregions. */ 02931 02932 len_fac = (float)nx / (float)num_regions; 02933 height_fac = (float)ny / (float)num_regions; 02934 02935 place_region_bounds_x[0] = 0.5; 02936 place_region_bounds_y[0] = 0.5; 02937 02938 for(i = 1; i <= num_regions; i++) 02939 { 02940 place_region_bounds_x[i] = place_region_bounds_x[i - 1] + len_fac; 02941 place_region_bounds_y[i] = 02942 place_region_bounds_y[i - 1] + height_fac; 02943 } 02944 } 02945 02946 02947 /** This routine finds the bounding box of each net from scratch (i.e. 02948 * from only the block location information). It updates both the 02949 * coordinate and number of blocks on each edge information. It 02950 * should only be called when the bounding box information is not valid. 02951 */ 02952 static void 02953 get_bb_from_scratch(int inet, 02954 struct s_bb *coords, 02955 struct s_bb *num_on_edges) 02956 { 02957 02958 int ipin, bnum, x, y, xmin, xmax, ymin, ymax; 02959 int xmin_edge, xmax_edge, ymin_edge, ymax_edge; 02960 int n_pins; 02961 02962 n_pins = clb_net[inet].num_sinks + 1; 02963 02964 x = block[clb_net[inet].node_block[0]].x; 02965 y = block[clb_net[inet].node_block[0]].y + block[clb_net[inet].node_block[0]].type->pin_height[clb_net[inet].node_block_pin[0]]; 02966 02967 x = max(min(x, nx), 1); 02968 y = max(min(y, ny), 1); 02969 02970 xmin = x; 02971 ymin = y; 02972 xmax = x; 02973 ymax = y; 02974 xmin_edge = 1; 02975 ymin_edge = 1; 02976 xmax_edge = 1; 02977 ymax_edge = 1; 02978 02979 for(ipin = 1; ipin < n_pins; ipin++) 02980 { 02981 bnum = clb_net[inet].node_block[ipin]; 02982 x = block[bnum].x; 02983 y = block[bnum].y + block[clb_net[inet].node_block[ipin]].type->pin_height[clb_net[inet].node_block_pin[ipin]]; 02984 02985 /* Code below counts IO blocks as being within the 1..nx, 1..ny clb array. * 02986 * This is because channels do not go out of the 0..nx, 0..ny range, and * 02987 * I always take all channels impinging on the bounding box to be within * 02988 * that bounding box. Hence, this "movement" of IO blocks does not affect * 02989 * the which channels are included within the bounding box, and it * 02990 * simplifies the code a lot. */ 02991 02992 x = max(min(x, nx), 1); 02993 y = max(min(y, ny), 1); 02994 02995 if(x == xmin) 02996 { 02997 xmin_edge++; 02998 } 02999 if(x == xmax) 03000 { /* Recall that xmin could equal xmax -- don't use else */ 03001 xmax_edge++; 03002 } 03003 else if(x < xmin) 03004 { 03005 xmin = x; 03006 xmin_edge = 1; 03007 } 03008 else if(x > xmax) 03009 { 03010 xmax = x; 03011 xmax_edge = 1; 03012 } 03013 03014 if(y == ymin) 03015 { 03016 ymin_edge++; 03017 } 03018 if(y == ymax) 03019 { 03020 ymax_edge++; 03021 } 03022 else if(y < ymin) 03023 { 03024 ymin = y; 03025 ymin_edge = 1; 03026 } 03027 else if(y > ymax) 03028 { 03029 ymax = y; 03030 ymax_edge = 1; 03031 } 03032 } 03033 03034 /* Copy the coordinates and number on edges information into the proper * 03035 * structures. */ 03036 03037 coords->xmin = xmin; 03038 coords->xmax = xmax; 03039 coords->ymin = ymin; 03040 coords->ymax = ymax; 03041 03042 num_on_edges->xmin = xmin_edge; 03043 num_on_edges->xmax = xmax_edge; 03044 num_on_edges->ymin = ymin_edge; 03045 num_on_edges->ymax = ymax_edge; 03046 } 03047 03048 03049 /** WMF: Finds the estimate of wirelength due to one net by looking at 03050 * its coordinate bounding box. 03051 */ 03052 static double 03053 get_net_wirelength_estimate(int inet, 03054 struct s_bb *bbptr) 03055 { 03056 03057 double ncost, crossing; 03058 03059 /* Get the expected "crossing count" of a net, based on its number * 03060 * of pins. Extrapolate for very large nets. */ 03061 03062 if(((clb_net[inet].num_sinks + 1) > 50) && ((clb_net[inet].num_sinks + 1) < 85)) 03063 { 03064 crossing = 2.7933 + 0.02616 * ((clb_net[inet].num_sinks + 1) - 50); 03065 } 03066 else if((clb_net[inet].num_sinks + 1) >= 85) 03067 { 03068 crossing = 03069 2.7933 + 0.011 * (clb_net[inet].num_sinks + 1) - 03070 0.0000018 * (clb_net[inet].num_sinks + 1) * (clb_net[inet].num_sinks + 03071 1); 03072 } 03073 else 03074 { 03075 crossing = cross_count[(clb_net[inet].num_sinks + 1) - 1]; 03076 } 03077 03078 /* Could insert a check for xmin == xmax. In that case, assume * 03079 * connection will be made with no bends and hence no x-cost. * 03080 * Same thing for y-cost. */ 03081 03082 /* Cost = wire length along channel * cross_count / average * 03083 * channel capacity. Do this for x, then y direction and add. */ 03084 03085 ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing; 03086 03087 ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing; 03088 03089 return (ncost); 03090 } 03091 03092 /** Finds the cost due to one net by looking at its coordinate bounding 03093 * box. 03094 */ 03095 static float 03096 get_net_cost(int inet, 03097 struct s_bb *bbptr) 03098 { 03099 03100 float ncost, crossing; 03101 03102 /* Get the expected "crossing count" of a net, based on its number * 03103 * of pins. Extrapolate for very large nets. */ 03104 03105 if((clb_net[inet].num_sinks + 1) > 50) 03106 { 03107 crossing = 2.7933 + 0.02616 * ((clb_net[inet].num_sinks + 1) - 50); 03108 /* crossing = 3.0; Old value */ 03109 } 03110 else 03111 { 03112 crossing = cross_count[(clb_net[inet].num_sinks + 1) - 1]; 03113 } 03114 03115 /* Could insert a check for xmin == xmax. In that case, assume * 03116 * connection will be made with no bends and hence no x-cost. * 03117 * Same thing for y-cost. */ 03118 03119 /* Cost = wire length along channel * cross_count / average * 03120 * channel capacity. Do this for x, then y direction and add. */ 03121 03122 ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing * 03123 chanx_place_cost_fac[bbptr->ymax][bbptr->ymin - 1]; 03124 03125 ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing * 03126 chany_place_cost_fac[bbptr->xmax][bbptr->xmin - 1]; 03127 03128 return (ncost); 03129 } 03130 03131 /** Finds the bounding box of a net and stores its coordinates in the 03132 * bb_coord_new data structure. This routine should only be called 03133 * for small nets, since it does not determine enough information for 03134 * the bounding box to be updated incrementally later. 03135 * Currently assumes channels on both sides of the CLBs forming the 03136 * edges of the bounding box can be used. Essentially, I am assuming 03137 * the pins always lie on the outside of the bounding box. 03138 */ 03139 static void 03140 get_non_updateable_bb(int inet, 03141 struct s_bb *bb_coord_new) 03142 { 03143 03144 int k, xmax, ymax, xmin, ymin, x, y; 03145 03146 x = block[clb_net[inet].node_block[0]].x; 03147 y = block[clb_net[inet].node_block[0]].y + block[clb_net[inet].node_block[0]].type->pin_height[clb_net[inet].node_block_pin[0]]; 03148 03149 xmin = x; 03150 ymin = y; 03151 xmax = x; 03152 ymax = y; 03153 03154 for(k = 1; k < (clb_net[inet].num_sinks + 1); k++) 03155 { 03156 x = block[clb_net[inet].node_block[k]].x; 03157 y = block[clb_net[inet].node_block[k]].y + block[clb_net[inet].node_block[k]].type->pin_height[clb_net[inet].node_block_pin[k]]; 03158 03159 if(x < xmin) 03160 { 03161 xmin = x; 03162 } 03163 else if(x > xmax) 03164 { 03165 xmax = x; 03166 } 03167 03168 if(y < ymin) 03169 { 03170 ymin = y; 03171 } 03172 else if(y > ymax) 03173 { 03174 ymax = y; 03175 } 03176 } 03177 03178 /* Now I've found the coordinates of the bounding box. There are no * 03179 * channels beyond nx and ny, so I want to clip to that. As well, * 03180 * since I'll always include the channel immediately below and the * 03181 * channel immediately to the left of the bounding box, I want to * 03182 * clip to 1 in both directions as well (since minimum channel index * 03183 * is 0). See route.c for a channel diagram. */ 03184 03185 bb_coord_new->xmin = max(min(xmin, nx), 1); 03186 bb_coord_new->ymin = max(min(ymin, ny), 1); 03187 bb_coord_new->xmax = max(min(xmax, nx), 1); 03188 bb_coord_new->ymax = max(min(ymax, ny), 1); 03189 } 03190 03191 /** Updates the bounding box of a net by storing its coordinates in 03192 * the bb_coord_new data structure and the number of blocks on each 03193 * edge in the bb_edge_new data structure. This routine should only 03194 * be called for large nets, since it has some overhead relative to 03195 * just doing a brute force bounding box calculation. The bounding 03196 * box coordinate and edge information for inet must be valid before 03197 * this routine is called. 03198 * Currently assumes channels on both sides of the CLBs forming the 03199 * edges of the bounding box can be used. Essentially, I am assuming 03200 * the pins always lie on the outside of the bounding box. 03201 */ 03202 static void 03203 update_bb(int inet, 03204 struct s_bb *bb_coord_new, 03205 struct s_bb *bb_edge_new, 03206 int xold, 03207 int yold, 03208 int xnew, 03209 int ynew) 03210 { 03211 03212 /* IO blocks are considered to be one cell in for simplicity. */ 03213 03214 xnew = max(min(xnew, nx), 1); 03215 ynew = max(min(ynew, ny), 1); 03216 xold = max(min(xold, nx), 1); 03217 yold = max(min(yold, ny), 1); 03218 03219 /* Check if I can update the bounding box incrementally. */ 03220 03221 if(xnew < xold) 03222 { /* Move to left. */ 03223 03224 /* Update the xmax fields for coordinates and number of edges first. */ 03225 03226 if(xold == bb_coords[inet].xmax) 03227 { /* Old position at xmax. */ 03228 if(bb_num_on_edges[inet].xmax == 1) 03229 { 03230 get_bb_from_scratch(inet, bb_coord_new, 03231 bb_edge_new); 03232 return; 03233 } 03234 else 03235 { 03236 bb_edge_new->xmax = 03237 bb_num_on_edges[inet].xmax - 1; 03238 bb_coord_new->xmax = bb_coords[inet].xmax; 03239 } 03240 } 03241 03242 else 03243 { /* Move to left, old postion was not at xmax. */ 03244 bb_coord_new->xmax = bb_coords[inet].xmax; 03245 bb_edge_new->xmax = bb_num_on_edges[inet].xmax; 03246 } 03247 03248 /* Now do the xmin fields for coordinates and number of edges. */ 03249 03250 if(xnew < bb_coords[inet].xmin) 03251 { /* Moved past xmin */ 03252 bb_coord_new->xmin = xnew; 03253 bb_edge_new->xmin = 1; 03254 } 03255 03256 else if(xnew == bb_coords[inet].xmin) 03257 { /* Moved to xmin */ 03258 bb_coord_new->xmin = xnew; 03259 bb_edge_new->xmin = bb_num_on_edges[inet].xmin + 1; 03260 } 03261 03262 else 03263 { /* Xmin unchanged. */ 03264 bb_coord_new->xmin = bb_coords[inet].xmin; 03265 bb_edge_new->xmin = bb_num_on_edges[inet].xmin; 03266 } 03267 } 03268 03269 /* End of move to left case. */ 03270 else if(xnew > xold) 03271 { /* Move to right. */ 03272 03273 /* Update the xmin fields for coordinates and number of edges first. */ 03274 03275 if(xold == bb_coords[inet].xmin) 03276 { /* Old position at xmin. */ 03277 if(bb_num_on_edges[inet].xmin == 1) 03278 { 03279 get_bb_from_scratch(inet, bb_coord_new, 03280 bb_edge_new); 03281 return; 03282 } 03283 else 03284 { 03285 bb_edge_new->xmin = 03286 bb_num_on_edges[inet].xmin - 1; 03287 bb_coord_new->xmin = bb_coords[inet].xmin; 03288 } 03289 } 03290 03291 else 03292 { /* Move to right, old position was not at xmin. */ 03293 bb_coord_new->xmin = bb_coords[inet].xmin; 03294 bb_edge_new->xmin = bb_num_on_edges[inet].xmin; 03295 } 03296 03297 /* Now do the xmax fields for coordinates and number of edges. */ 03298 03299 if(xnew > bb_coords[inet].xmax) 03300 { /* Moved past xmax. */ 03301 bb_coord_new->xmax = xnew; 03302 bb_edge_new->xmax = 1; 03303 } 03304 03305 else if(xnew == bb_coords[inet].xmax) 03306 { /* Moved to xmax */ 03307 bb_coord_new->xmax = xnew; 03308 bb_edge_new->xmax = bb_num_on_edges[inet].xmax + 1; 03309 } 03310 03311 else 03312 { /* Xmax unchanged. */ 03313 bb_coord_new->xmax = bb_coords[inet].xmax; 03314 bb_edge_new->xmax = bb_num_on_edges[inet].xmax; 03315 } 03316 } 03317 /* End of move to right case. */ 03318 else 03319 { /* xnew == xold -- no x motion. */ 03320 bb_coord_new->xmin = bb_coords[inet].xmin; 03321 bb_coord_new->xmax = bb_coords[inet].xmax; 03322 bb_edge_new->xmin = bb_num_on_edges[inet].xmin; 03323 bb_edge_new->xmax = bb_num_on_edges[inet].xmax; 03324 } 03325 03326 /* Now account for the y-direction motion. */ 03327 03328 if(ynew < yold) 03329 { /* Move down. */ 03330 03331 /* Update the ymax fields for coordinates and number of edges first. */ 03332 03333 if(yold == bb_coords[inet].ymax) 03334 { /* Old position at ymax. */ 03335 if(bb_num_on_edges[inet].ymax == 1) 03336 { 03337 get_bb_from_scratch(inet, bb_coord_new, 03338 bb_edge_new); 03339 return; 03340 } 03341 else 03342 { 03343 bb_edge_new->ymax = 03344 bb_num_on_edges[inet].ymax - 1; 03345 bb_coord_new->ymax = bb_coords[inet].ymax; 03346 } 03347 } 03348 03349 else 03350 { /* Move down, old postion was not at ymax. */ 03351 bb_coord_new->ymax = bb_coords[inet].ymax; 03352 bb_edge_new->ymax = bb_num_on_edges[inet].ymax; 03353 } 03354 03355 /* Now do the ymin fields for coordinates and number of edges. */ 03356 03357 if(ynew < bb_coords[inet].ymin) 03358 { /* Moved past ymin */ 03359 bb_coord_new->ymin = ynew; 03360 bb_edge_new->ymin = 1; 03361 } 03362 03363 else if(ynew == bb_coords[inet].ymin) 03364 { /* Moved to ymin */ 03365 bb_coord_new->ymin = ynew; 03366 bb_edge_new->ymin = bb_num_on_edges[inet].ymin + 1; 03367 } 03368 03369 else 03370 { /* ymin unchanged. */ 03371 bb_coord_new->ymin = bb_coords[inet].ymin; 03372 bb_edge_new->ymin = bb_num_on_edges[inet].ymin; 03373 } 03374 } 03375 /* End of move down case. */ 03376 else if(ynew > yold) 03377 { /* Moved up. */ 03378 03379 /* Update the ymin fields for coordinates and number of edges first. */ 03380 03381 if(yold == bb_coords[inet].ymin) 03382 { /* Old position at ymin. */ 03383 if(bb_num_on_edges[inet].ymin == 1) 03384 { 03385 get_bb_from_scratch(inet, bb_coord_new, 03386 bb_edge_new); 03387 return; 03388 } 03389 else 03390 { 03391 bb_edge_new->ymin = 03392 bb_num_on_edges[inet].ymin - 1; 03393 bb_coord_new->ymin = bb_coords[inet].ymin; 03394 } 03395 } 03396 03397 else 03398 { /* Moved up, old position was not at ymin. */ 03399 bb_coord_new->ymin = bb_coords[inet].ymin; 03400 bb_edge_new->ymin = bb_num_on_edges[inet].ymin; 03401 } 03402 03403 /* Now do the ymax fields for coordinates and number of edges. */ 03404 03405 if(ynew > bb_coords[inet].ymax) 03406 { /* Moved past ymax. */ 03407 bb_coord_new->ymax = ynew; 03408 bb_edge_new->ymax = 1; 03409 } 03410 03411 else if(ynew == bb_coords[inet].ymax) 03412 { /* Moved to ymax */ 03413 bb_coord_new->ymax = ynew; 03414 bb_edge_new->ymax = bb_num_on_edges[inet].ymax + 1; 03415 } 03416 03417 else 03418 { /* ymax unchanged. */ 03419 bb_coord_new->ymax = bb_coords[inet].ymax; 03420 bb_edge_new->ymax = bb_num_on_edges[inet].ymax; 03421 } 03422 } 03423 /* End of move up case. */ 03424 else 03425 { /* ynew == yold -- no y motion. */ 03426 bb_coord_new->ymin = bb_coords[inet].ymin; 03427 bb_coord_new->ymax = bb_coords[inet].ymax; 03428 bb_edge_new->ymin = bb_num_on_edges[inet].ymin; 03429 bb_edge_new->ymax = bb_num_on_edges[inet].ymax; 03430 } 03431 } 03432 03433 03434 /** Randomly places the blocks to create an initial placement. */ 03435 static void 03436 initial_placement(enum e_pad_loc_type pad_loc_type, 03437 char *pad_loc_file) 03438 { 03439 03440 struct s_pos 03441 { 03442 int x; 03443 int y; 03444 int z; 03445 } 03446 **pos; /* [0..num_types-1][0..type_tsize - 1] */ 03447 int i, j, k, iblk, choice, type_index, x, y, z; 03448 int *count, *index; /* [0..num_types-1] */ 03449 03450 pos = (struct s_pos **)my_malloc(num_types * sizeof(struct s_pos *)); 03451 count = (int *)my_calloc(num_types, sizeof(int)); 03452 index = (int *)my_calloc(num_types, sizeof(int)); 03453 03454 /* Initialize all occupancy to zero. */ 03455 03456 for(i = 0; i <= nx + 1; i++) 03457 { 03458 for(j = 0; j <= ny + 1; j++) 03459 { 03460 grid[i][j].usage = 0; 03461 for(k = 0; k < grid[i][j].type->capacity; k++) 03462 { 03463 grid[i][j].blocks[k] = EMPTY; 03464 if(grid[i][j].offset == 0) 03465 { 03466 count[grid[i][j].type->index]++; 03467 } 03468 } 03469 } 03470 } 03471 03472 for(i = 0; i < num_types; i++) 03473 { 03474 pos[i] = 03475 (struct s_pos *)my_malloc(count[i] * sizeof(struct s_pos)); 03476 } 03477 03478 for(i = 0; i <= nx + 1; i++) 03479 { 03480 for(j = 0; j <= ny + 1; j++) 03481 { 03482 for(k = 0; k < grid[i][j].type->capacity; k++) 03483 { 03484 if(grid[i][j].offset == 0) 03485 { 03486 type_index = grid[i][j].type->index; 03487 pos[type_index][index[type_index]].x = i; 03488 pos[type_index][index[type_index]].y = j; 03489 pos[type_index][index[type_index]].z = k; 03490 index[type_index]++; 03491 } 03492 } 03493 } 03494 } 03495 03496 for(iblk = 0; iblk < num_blocks; iblk++) 03497 { 03498 /* Don't do IOs if the user specifies IOs */ 03499 if(!(block[iblk].type == IO_TYPE && pad_loc_type == USER)) 03500 { 03501 type_index = block[iblk].type->index; 03502 assert(count[type_index] > 0); 03503 choice = my_irand(count[type_index] - 1); 03504 x = pos[type_index][choice].x; 03505 y = pos[type_index][choice].y; 03506 z = pos[type_index][choice].z; 03507 grid[x][y].blocks[z] = iblk; 03508 grid[x][y].usage++; 03509 03510 /* Ensure randomizer doesn't pick this block again */ 03511 pos[type_index][choice] = pos[type_index][count[type_index] - 1]; /* overwrite used block position */ 03512 count[type_index]--; 03513 } 03514 } 03515 03516 if(pad_loc_type == USER) 03517 { 03518 read_user_pad_loc(pad_loc_file); 03519 } 03520 03521 /* All the blocks are placed now. Make the block array agree with the * 03522 * clb array. */ 03523 03524 for(i = 0; i <= (nx + 1); i++) 03525 { 03526 for(j = 0; j <= (ny + 1); j++) 03527 { 03528 for(k = 0; k < grid[i][j].type->capacity; k++) 03529 { 03530 assert(grid[i][j].blocks != NULL); 03531 iblk = grid[i][j].blocks[k]; 03532 03533 if(iblk != EMPTY) 03534 { 03535 block[iblk].x = i; 03536 block[iblk].y = j; 03537 block[iblk].z = k; 03538 } 03539 } 03540 } 03541 } 03542 03543 #ifdef VERBOSE 03544 printf("At end of initial_placement.\n"); 03545 dump_clbs(); 03546 #endif 03547 for(i = 0; i < num_types; i++) 03548 { 03549 free(pos[i]); 03550 } 03551 free(pos); /* Free the mapping list */ 03552 free(index); 03553 free(count); 03554 } 03555 03556 /** Frees the structures used to speed up evaluation of the nonlinear 03557 * congestion cost function. 03558 */ 03559 static void 03560 free_fast_cost_update_structs(void) 03561 { 03562 03563 int i; 03564 03565 for(i = 0; i <= ny; i++) 03566 free(chanx_place_cost_fac[i]); 03567 03568 free(chanx_place_cost_fac); 03569 03570 for(i = 0; i <= nx; i++) 03571 free(chany_place_cost_fac[i]); 03572 03573 free(chany_place_cost_fac); 03574 } 03575 03576 /** Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac 03577 * arrays with the inverse of the average number of tracks per channel 03578 * between [subhigh] and [sublow]. This is only useful for the cost 03579 * function that takes the length of the net bounding box in each 03580 * dimension divided by the average number of tracks in that direction. 03581 * For other cost functions, you don't have to bother calling this 03582 * routine; when using the cost function described above, however, you 03583 * must always call this routine after you call init_chan and before 03584 * you do any placement cost determination. The place_cost_exp factor 03585 * specifies to what power the width of the channel should be taken -- 03586 * larger numbers make narrower channels more expensive. 03587 */ 03588 static void 03589 alloc_and_load_for_fast_cost_update(float place_cost_exp) 03590 { 03591 03592 int low, high, i; 03593 03594 /* Access arrays below as chan?_place_cost_fac[subhigh][sublow]. Since * 03595 * subhigh must be greater than or equal to sublow, we only need to * 03596 * allocate storage for the lower half of a matrix. */ 03597 03598 chanx_place_cost_fac = (float **)my_malloc((ny + 1) * sizeof(float *)); 03599 for(i = 0; i <= ny; i++) 03600 chanx_place_cost_fac[i] = (float *)my_malloc((i + 1) * sizeof(float)); 03601 03602 chany_place_cost_fac = (float **)my_malloc((nx + 1) * sizeof(float *)); 03603 for(i = 0; i <= nx; i++) 03604 chany_place_cost_fac[i] = (float *)my_malloc((i + 1) * sizeof(float)); 03605 03606 03607 /* First compute the number of tracks between channel high and channel * 03608 * low, inclusive, in an efficient manner. */ 03609 03610 chanx_place_cost_fac[0][0] = chan_width_x[0]; 03611 03612 for(high = 1; high <= ny; high++) 03613 { 03614 chanx_place_cost_fac[high][high] = chan_width_x[high]; 03615 for(low = 0; low < high; low++) 03616 { 03617 chanx_place_cost_fac[high][low] = 03618 chanx_place_cost_fac[high - 1][low] + 03619 chan_width_x[high]; 03620 } 03621 } 03622 03623 /* Now compute the inverse of the average number of tracks per channel * 03624 * between high and low. The cost function divides by the average * 03625 * number of tracks per channel, so by storing the inverse I convert * 03626 * this to a faster multiplication. Take this final number to the * 03627 * place_cost_exp power -- numbers other than one mean this is no * 03628 * longer a simple "average number of tracks"; it is some power of * 03629 * that, allowing greater penalization of narrow channels. */ 03630 03631 for(high = 0; high <= ny; high++) 03632 for(low = 0; low <= high; low++) 03633 { 03634 chanx_place_cost_fac[high][low] = (high - low + 1.) / 03635 chanx_place_cost_fac[high][low]; 03636 chanx_place_cost_fac[high][low] = 03637 pow((double)chanx_place_cost_fac[high][low], 03638 (double)place_cost_exp); 03639 } 03640 03641 03642 /* Now do the same thing for the y-directed channels. First get the * 03643 * number of tracks between channel high and channel low, inclusive. */ 03644 03645 chany_place_cost_fac[0][0] = chan_width_y[0]; 03646 03647 for(high = 1; high <= nx; high++) 03648 { 03649 chany_place_cost_fac[high][high] = chan_width_y[high]; 03650 for(low = 0; low < high; low++) 03651 { 03652 chany_place_cost_fac[high][low] = 03653 chany_place_cost_fac[high - 1][low] + 03654 chan_width_y[high]; 03655 } 03656 } 03657 03658 /* Now compute the inverse of the average number of tracks per channel * 03659 * between high and low. Take to specified power. */ 03660 03661 for(high = 0; high <= nx; high++) 03662 for(low = 0; low <= high; low++) 03663 { 03664 chany_place_cost_fac[high][low] = (high - low + 1.) / 03665 chany_place_cost_fac[high][low]; 03666 chany_place_cost_fac[high][low] = 03667 pow((double)chany_place_cost_fac[high][low], 03668 (double)place_cost_exp); 03669 } 03670 } 03671 03672 /** Checks that the placement has not confused our data structures. 03673 * i.e. the clb and block structures agree about the locations of 03674 * every block, blocks are in legal spots, etc. Also recomputes 03675 * the final placement cost from scratch and makes sure it is 03676 * within roundoff of what we think the cost is. 03677 */ 03678 static void 03679 check_place(float bb_cost, 03680 float timing_cost, 03681 int place_cost_type, 03682 int num_regions, 03683 enum e_place_algorithm place_algorithm, 03684 float delay_cost) 03685 { 03686 03687 static int *bdone; 03688 int i, j, k, error = 0, bnum; 03689 float bb_cost_check; 03690 int usage_check; 03691 float timing_cost_check, delay_cost_check; 03692 03693 bb_cost_check = comp_bb_cost(CHECK, place_cost_type, num_regions); 03694 printf("bb_cost recomputed from scratch is %g.\n", bb_cost_check); 03695 if(fabs(bb_cost_check - bb_cost) > bb_cost * ERROR_TOL) 03696 { 03697 printf 03698 ("Error: bb_cost_check: %g and bb_cost: %g differ in check_place.\n", 03699 bb_cost_check, bb_cost); 03700 error++; 03701 } 03702 03703 if(place_algorithm == NET_TIMING_DRIVEN_PLACE || 03704 place_algorithm == PATH_TIMING_DRIVEN_PLACE) 03705 { 03706 comp_td_costs(&timing_cost_check, &delay_cost_check); 03707 printf("timing_cost recomputed from scratch is %g. \n", 03708 timing_cost_check); 03709 if(fabs(timing_cost_check - timing_cost) > 03710 timing_cost * ERROR_TOL) 03711 { 03712 printf("Error: timing_cost_check: %g and timing_cost: " 03713 "%g differ in check_place.\n", 03714 timing_cost_check, timing_cost); 03715 error++; 03716 } 03717 printf("delay_cost recomputed from scratch is %g. \n", 03718 delay_cost_check); 03719 if(fabs(delay_cost_check - delay_cost) > delay_cost * ERROR_TOL) 03720 { 03721 printf("Error: delay_cost_check: %g and delay_cost: " 03722 "%g differ in check_place.\n", 03723 delay_cost_check, delay_cost); 03724 error++; 03725 } 03726 } 03727 03728 bdone = (int *)my_malloc(num_blocks * sizeof(int)); 03729 for(i = 0; i < num_blocks; i++) 03730 bdone[i] = 0; 03731 03732 /* Step through grid array. Check it against block array. */ 03733 for(i = 0; i <= (nx + 1); i++) 03734 for(j = 0; j <= (ny + 1); j++) 03735 { 03736 if(grid[i][j].usage > grid[i][j].type->capacity) 03737 { 03738 printf 03739 ("Error: block at grid location (%d,%d) overused. " 03740 "Usage is %d\n", i, j, grid[i][j].usage); 03741 error++; 03742 } 03743 usage_check = 0; 03744 for(k = 0; k < grid[i][j].type->capacity; k++) 03745 { 03746 bnum = grid[i][j].blocks[k]; 03747 if(EMPTY == bnum) 03748 continue; 03749 03750 if(block[bnum].type != grid[i][j].type) 03751 { 03752 printf 03753 ("Error: block %d type does not match grid location (%d,%d) type.\n", 03754 bnum, i, j); 03755 error++; 03756 } 03757 if((block[bnum].x != i) || (block[bnum].y != j)) 03758 { 03759 printf 03760 ("Error: block %d location conflicts with grid(%d,%d)" 03761 "data.\n", bnum, i, j); 03762 error++; 03763 } 03764 ++usage_check; 03765 bdone[bnum]++; 03766 } 03767 if(usage_check != grid[i][j].usage) 03768 { 03769 printf 03770 ("Error: Location (%d,%d) usage is %d, but has actual usage %d.\n", 03771 i, j, grid[i][j].usage, usage_check); 03772 } 03773 } 03774 03775 /* Check that every block exists in the grid and block arrays somewhere. */ 03776 for(i = 0; i < num_blocks; i++) 03777 if(bdone[i] != 1) 03778 { 03779 printf 03780 ("Error: block %d listed %d times in data structures.\n", 03781 i, bdone[i]); 03782 error++; 03783 } 03784 free(bdone); 03785 03786 if(error == 0) 03787 { 03788 printf 03789 ("\nCompleted placement consistency check successfully.\n\n"); 03790 #ifdef PRINT_REL_POS_DISTR 03791 print_relative_pos_distr(); 03792 #endif 03793 } 03794 else 03795 { 03796 printf 03797 ("\nCompleted placement consistency check, %d Errors found.\n\n", 03798 error); 03799 printf("Aborting program.\n"); 03800 exit(1); 03801 } 03802 }