00001
00002 #include <stdio.h>
00003 #include <math.h>
00004 #include <assert.h>
00005 #include "util.h"
00006 #include "vpr_types.h"
00007 #include "globals.h"
00008 #include "mst.h"
00009 #include "place.h"
00010 #include "read_place.h"
00011 #include "draw.h"
00012 #include "place_and_route.h"
00013 #include "net_delay.h"
00014 #include "path_delay.h"
00015 #include "timing_place_lookup.h"
00016 #include "timing_place.h"
00017 #include "place_stats.h"
00018
00019
00020
00021 #define SMALL_NET 4
00022
00023
00024
00025
00026
00027
00028
00029
00030 enum cost_methods
00031 { NORMAL, CHECK };
00032
00033 #define FROM 0
00034 #define TO 1
00035 #define FROM_AND_TO 2
00036
00037 #define ERROR_TOL .0025
00038 #define MAX_MOVES_BEFORE_RECOMPUTE 50000
00039
00040
00041
00042
00043
00044
00045 static int *duplicate_pins;
00046
00047
00048
00049
00050 static int **unique_pin_list;
00051
00052
00053
00054 static float *net_cost = NULL, *temp_net_cost = NULL;
00055
00056
00057
00058
00059 static float **point_to_point_timing_cost = NULL;
00060 static float **temp_point_to_point_timing_cost = NULL;
00061
00062
00063
00064
00065
00066 static float **point_to_point_delay_cost = NULL;
00067 static float **temp_point_to_point_delay_cost = NULL;
00068
00069
00070
00071
00072
00073
00074 static int **net_pin_index = NULL;
00075
00076
00077
00078
00079
00080
00081 static struct s_bb *bb_coords = NULL, *bb_num_on_edges = NULL;
00082
00083
00084
00085
00086
00087
00088 static struct s_place_region **place_region_x, **place_region_y;
00089
00090
00091
00092 static float *place_region_bounds_x, *place_region_bounds_y;
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102 static float **chanx_place_cost_fac, **chany_place_cost_fac;
00103
00104
00105
00106
00107
00108 static const float cross_count[50] = {
00109 1.0, 1.0, 1.0, 1.0828, 1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493,
00110 1.4974, 1.5455, 1.5937, 1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519,
00111 1.8924,
00112 1.9288, 1.9652, 2.0015, 2.0379, 2.0743, 2.1061, 2.1379, 2.1698, 2.2016,
00113 2.2334,
00114 2.2646, 2.2958, 2.3271, 2.3583, 2.3895, 2.4187, 2.4479, 2.4772, 2.5064,
00115 2.5356,
00116 2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410, 2.7671,
00117 2.7933
00118 };
00119
00120
00121
00122
00123 static void alloc_and_load_unique_pin_list(void);
00124
00125 static void free_unique_pin_list(void);
00126
00127 static void alloc_place_regions(int num_regions);
00128
00129 static void load_place_regions(int num_regions);
00130
00131 static void free_place_regions(int num_regions);
00132
00133 static void alloc_and_load_placement_structs(int place_cost_type,
00134 int num_regions,
00135 float place_cost_exp,
00136 float ***old_region_occ_x,
00137 float ***old_region_occ_y,
00138 struct s_placer_opts
00139 placer_opts);
00140
00141 static void free_placement_structs(int place_cost_type,
00142 int num_regions,
00143 float **old_region_occ_x,
00144 float **old_region_occ_y,
00145 struct s_placer_opts placer_opts);
00146
00147 static void alloc_and_load_for_fast_cost_update(float place_cost_exp);
00148
00149 static void initial_placement(enum e_pad_loc_type pad_loc_type,
00150 char *pad_loc_file);
00151
00152 static float comp_bb_cost(int method,
00153 int place_cost_type,
00154 int num_regions);
00155
00156 static int try_swap(float t,
00157 float *cost,
00158 float *bb_cost,
00159 float *timing_cost,
00160 float rlim,
00161 int place_cost_type,
00162 float **old_region_occ_x,
00163 float **old_region_occ_y,
00164 int num_regions,
00165 boolean fixed_pins,
00166 enum e_place_algorithm place_algorithm,
00167 float timing_tradeoff,
00168 float inverse_prev_bb_cost,
00169 float inverse_prev_timing_cost,
00170 float *delay_cost,
00171 int *x_lookup);
00172
00173 static void check_place(float bb_cost,
00174 float timing_cost,
00175 int place_cost_type,
00176 int num_regions,
00177 enum e_place_algorithm place_algorithm,
00178 float delay_cost);
00179
00180 static float starting_t(float *cost_ptr,
00181 float *bb_cost_ptr,
00182 float *timing_cost_ptr,
00183 int place_cost_type,
00184 float **old_region_occ_x,
00185 float **old_region_occ_y,
00186 int num_regions,
00187 boolean fixed_pins,
00188 struct s_annealing_sched annealing_sched,
00189 int max_moves,
00190 float rlim,
00191 enum e_place_algorithm place_algorithm,
00192 float timing_tradeoff,
00193 float inverse_prev_bb_cost,
00194 float inverse_prev_timing_cost,
00195 float *delay_cost_ptr);
00196
00197
00198 static void update_t(float *t,
00199 float std_dev,
00200 float rlim,
00201 float success_rat,
00202 struct s_annealing_sched annealing_sched);
00203
00204 static void update_rlim(float *rlim,
00205 float success_rat);
00206
00207 static int exit_crit(float t,
00208 float cost,
00209 struct s_annealing_sched annealing_sched);
00210
00211 static int count_connections(void);
00212
00213 static void compute_net_pin_index_values(void);
00214
00215 static double get_std_dev(int n,
00216 double sum_x_squared,
00217 double av_x);
00218
00219 static void free_fast_cost_update_structs(void);
00220
00221 static float recompute_bb_cost(int place_cost_type,
00222 int num_regions);
00223
00224 static float comp_td_point_to_point_delay(int inet,
00225 int ipin);
00226
00227 static void update_td_cost(int b_from,
00228 int b_to,
00229 int num_of_pins);
00230
00231 static void comp_delta_td_cost(int b_from,
00232 int b_to,
00233 int num_of_pins,
00234 float *delta_timing,
00235 float *delta_delay);
00236
00237 static void comp_td_costs(float *timing_cost,
00238 float *connection_delay_sum);
00239
00240 static int assess_swap(float delta_c,
00241 float t);
00242
00243 static boolean find_to(int x_from,
00244 int y_from,
00245 t_type_ptr type,
00246 float rlim,
00247 int *x_lookup,
00248 int *x_to,
00249 int *y_to);
00250
00251 static void get_non_updateable_bb(int inet,
00252 struct s_bb *bb_coord_new);
00253
00254 static void update_bb(int inet,
00255 struct s_bb *bb_coord_new,
00256 struct s_bb *bb_edge_new,
00257 int xold,
00258 int yold,
00259 int xnew,
00260 int ynew);
00261
00262 static int find_affected_nets(int *nets_to_update,
00263 int *net_block_moved,
00264 int b_from,
00265 int b_to,
00266 int num_of_pins);
00267
00268 static float get_net_cost(int inet,
00269 struct s_bb *bb_ptr);
00270
00271 static float nonlinear_cong_cost(int num_regions);
00272
00273 static void update_region_occ(int inet,
00274 struct s_bb *coords,
00275 int add_or_sub,
00276 int num_regions);
00277
00278 static void save_region_occ(float **old_region_occ_x,
00279 float **old_region_occ_y,
00280 int num_regions);
00281
00282 static void restore_region_occ(float **old_region_occ_x,
00283 float **old_region_occ_y,
00284 int num_regions);
00285
00286 static void get_bb_from_scratch(int inet,
00287 struct s_bb *coords,
00288 struct s_bb *num_on_edges);
00289
00290 static double get_net_wirelength_estimate(int inet,
00291 struct s_bb *bbptr);
00292
00293
00294
00295 void
00296 try_place(struct s_placer_opts placer_opts,
00297 struct s_annealing_sched annealing_sched,
00298 t_chan_width_dist chan_width_dist,
00299 struct s_router_opts router_opts,
00300 struct s_det_routing_arch det_routing_arch,
00301 t_segment_inf * segment_inf,
00302 t_timing_inf timing_inf,
00303 t_subblock_data * subblock_data_ptr,
00304 t_mst_edge *** mst)
00305 {
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315 int tot_iter, inner_iter, success_sum;
00316 int move_lim, moves_since_cost_recompute, width_fac;
00317 float t, success_rat, rlim, d_max, est_crit;
00318 float cost, timing_cost, bb_cost, new_bb_cost, new_timing_cost;
00319 float delay_cost, new_delay_cost, place_delay_value;
00320 float inverse_prev_bb_cost, inverse_prev_timing_cost;
00321 float oldt;
00322 double av_cost, av_bb_cost, av_timing_cost, av_delay_cost,
00323 sum_of_squares, std_dev;
00324 float **old_region_occ_x, **old_region_occ_y;
00325 char msg[BUFSIZE];
00326 boolean fixed_pins;
00327 int num_connections;
00328 int inet, ipin, outer_crit_iter_count, inner_crit_iter_count,
00329 inner_recompute_limit;
00330 float **net_slack, **net_delay;
00331 float crit_exponent;
00332 float first_rlim, final_rlim, inverse_delta_rlim;
00333 float **remember_net_delay_original_ptr;
00334
00335 int *x_lookup;
00336
00337
00338 x_lookup = my_malloc(nx * sizeof(int));
00339
00340 remember_net_delay_original_ptr = NULL;
00341
00342 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
00343 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE ||
00344 placer_opts.enable_timing_computations)
00345 {
00346
00347 alloc_lookups_and_criticalities(chan_width_dist,
00348 router_opts,
00349 det_routing_arch,
00350 segment_inf,
00351 timing_inf,
00352 *subblock_data_ptr,
00353 &net_delay, &net_slack);
00354
00355 remember_net_delay_original_ptr = net_delay;
00356
00357
00358 #ifdef PRINT_LOWER_BOUND
00359
00360
00361
00362 if(placer_opts.block_dist <= nx)
00363 place_delay_value =
00364 delta_clb_to_clb[placer_opts.block_dist][0];
00365 else if(placer_opts.block_dist <= ny)
00366 place_delay_value =
00367 delta_clb_to_clb[0][placer_opts.block_dist];
00368 else
00369 place_delay_value = delta_clb_to_clb[nx][ny];
00370
00371 printf("\nLower bound assuming delay of %g\n", place_delay_value);
00372
00373 load_constant_net_delay(net_delay, place_delay_value);
00374 load_timing_graph_net_delays(net_delay);
00375 d_max = load_net_slack(net_slack, 0);
00376
00377 #ifdef CREATE_ECHO_FILES
00378 print_critical_path("Placement_Lower_Bound.echo");
00379 print_sink_delays("Placement_Lower_Bound_Sink_Delays.echo");
00380 #endif
00381
00382
00383
00384
00385 load_constant_net_delay(net_delay, 0);
00386 load_timing_graph_net_delays(net_delay);
00387 d_max = load_net_slack(net_slack, 0);
00388
00389 #ifdef CREATE_ECHO_FILES
00390 print_sink_delays("Placement_Logic_Sink_Delays.echo");
00391 #endif
00392 #endif
00393
00394 }
00395
00396 width_fac = placer_opts.place_chan_width;
00397 if(placer_opts.pad_loc_type == FREE)
00398 fixed_pins = FALSE;
00399 else
00400 fixed_pins = TRUE;
00401
00402 init_chan(width_fac, chan_width_dist);
00403
00404 alloc_and_load_placement_structs(placer_opts.place_cost_type,
00405 placer_opts.num_regions,
00406 placer_opts.place_cost_exp,
00407 &old_region_occ_x, &old_region_occ_y,
00408 placer_opts);
00409
00410 initial_placement(placer_opts.pad_loc_type, placer_opts.pad_loc_file);
00411 init_draw_coords((float)width_fac);
00412
00413
00414
00415
00416
00417
00418 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
00419 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
00420 {
00421 bb_cost = comp_bb_cost(NORMAL, placer_opts.place_cost_type,
00422 placer_opts.num_regions);
00423
00424 crit_exponent = placer_opts.td_place_exp_first;
00425
00426 compute_net_pin_index_values();
00427
00428 num_connections = count_connections();
00429 printf
00430 ("\nThere are %d point to point connections in this circuit\n\n",
00431 num_connections);
00432
00433 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE)
00434 {
00435 for(inet = 0; inet < num_nets; inet++)
00436 for(ipin = 1; ipin <= net[inet].num_sinks; ipin++)
00437 timing_place_crit[inet][ipin] = 0;
00438
00439 comp_td_costs(&timing_cost, &delay_cost);
00440
00441
00442 place_delay_value = delay_cost / num_connections;
00443 load_constant_net_delay(net_delay, place_delay_value);
00444
00445 }
00446 else
00447 place_delay_value = 0;
00448
00449
00450 if(placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
00451 {
00452 net_delay = point_to_point_delay_cost;
00453
00454
00455
00456
00457 }
00458
00459
00460 load_timing_graph_net_delays(net_delay);
00461 d_max = load_net_slack(net_slack, 0);
00462 load_criticalities(placer_opts, net_slack, d_max, crit_exponent);
00463 outer_crit_iter_count = 1;
00464
00465
00466 comp_td_costs(&timing_cost, &delay_cost);
00467
00468 inverse_prev_timing_cost = 1 / timing_cost;
00469 inverse_prev_bb_cost = 1 / bb_cost;
00470 cost = 1;
00471
00472
00473 }
00474 else
00475 {
00476 cost = bb_cost = comp_bb_cost(NORMAL, placer_opts.place_cost_type,
00477 placer_opts.num_regions);
00478 timing_cost = 0;
00479 delay_cost = 0;
00480 place_delay_value = 0;
00481 outer_crit_iter_count = 0;
00482 num_connections = 0;
00483 d_max = 0;
00484 crit_exponent = 0;
00485
00486 inverse_prev_timing_cost = 0;
00487 inverse_prev_bb_cost = 0;
00488 }
00489
00490 move_lim = (int)(annealing_sched.inner_num * pow(num_blocks, 1.3333));
00491
00492 if(placer_opts.inner_loop_recompute_divider != 0)
00493 inner_recompute_limit = (int)(0.5 + (float)move_lim /
00494 (float)placer_opts.
00495 inner_loop_recompute_divider);
00496 else
00497 inner_recompute_limit = move_lim + 1;
00498
00499
00500
00501
00502
00503
00504
00505 if(move_lim <= 0)
00506 move_lim = 1;
00507
00508 rlim = (float)max(nx, ny);
00509
00510 first_rlim = rlim;
00511 final_rlim = 1;
00512 inverse_delta_rlim = 1 / (first_rlim - final_rlim);
00513
00514 t = starting_t(&cost, &bb_cost, &timing_cost,
00515 placer_opts.place_cost_type,
00516 old_region_occ_x, old_region_occ_y,
00517 placer_opts.num_regions, fixed_pins, annealing_sched,
00518 move_lim, rlim, placer_opts.place_algorithm,
00519 placer_opts.timing_tradeoff, inverse_prev_bb_cost,
00520 inverse_prev_timing_cost, &delay_cost);
00521 tot_iter = 0;
00522 moves_since_cost_recompute = 0;
00523 printf
00524 ("Initial Placement Cost: %g bb_cost: %g td_cost: %g delay_cost: %g\n\n",
00525 cost, bb_cost, timing_cost, delay_cost);
00526
00527 #ifndef SPEC
00528 printf
00529 ("%11s %10s %11s %11s %11s %11s %11s %9s %8s %7s %7s %10s %7s\n",
00530 "T", "Cost", "Av. BB Cost", "Av. TD Cost", "Av Tot Del",
00531 "P to P Del", "d_max", "Ac Rate", "Std Dev", "R limit", "Exp",
00532 "Tot. Moves", "Alpha");
00533 printf
00534 ("%11s %10s %11s %11s %11s %11s %11s %9s %8s %7s %7s %10s %7s\n",
00535 "--------", "----------", "-----------", "-----------",
00536 "---------", "----------", "-----", "-------", "-------",
00537 "-------", "-------", "----------", "-----");
00538 #endif
00539
00540 sprintf(msg,
00541 "Initial Placement. Cost: %g BB Cost: %g TD Cost %g Delay Cost: %g "
00542 "\t d_max %g Channel Factor: %d", cost, bb_cost, timing_cost,
00543 delay_cost, d_max, width_fac);
00544 update_screen(MAJOR, msg, PLACEMENT, FALSE);
00545
00546 while(exit_crit(t, cost, annealing_sched) == 0)
00547 {
00548
00549 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
00550 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
00551 {
00552 cost = 1;
00553 }
00554
00555 av_cost = 0.;
00556 av_bb_cost = 0.;
00557 av_delay_cost = 0.;
00558 av_timing_cost = 0.;
00559 sum_of_squares = 0.;
00560 success_sum = 0;
00561
00562 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
00563 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
00564 {
00565
00566 if(outer_crit_iter_count >=
00567 placer_opts.recompute_crit_iter
00568 || placer_opts.inner_loop_recompute_divider != 0)
00569 {
00570 #ifdef VERBOSE
00571 printf("Outer Loop Recompute Criticalities\n");
00572 #endif
00573 place_delay_value = delay_cost / num_connections;
00574
00575 if(placer_opts.place_algorithm ==
00576 NET_TIMING_DRIVEN_PLACE)
00577 load_constant_net_delay(net_delay,
00578 place_delay_value);
00579
00580
00581
00582 load_timing_graph_net_delays(net_delay);
00583 d_max = load_net_slack(net_slack, 0);
00584 load_criticalities(placer_opts, net_slack, d_max,
00585 crit_exponent);
00586
00587 comp_td_costs(&timing_cost, &delay_cost);
00588 outer_crit_iter_count = 0;
00589 }
00590 outer_crit_iter_count++;
00591
00592
00593
00594 inverse_prev_bb_cost = 1 / bb_cost;
00595 inverse_prev_timing_cost = 1 / timing_cost;
00596 }
00597
00598 inner_crit_iter_count = 1;
00599
00600 for(inner_iter = 0; inner_iter < move_lim; inner_iter++)
00601 {
00602 if(try_swap(t, &cost, &bb_cost, &timing_cost,
00603 rlim, placer_opts.place_cost_type,
00604 old_region_occ_x, old_region_occ_y,
00605 placer_opts.num_regions, fixed_pins,
00606 placer_opts.place_algorithm,
00607 placer_opts.timing_tradeoff,
00608 inverse_prev_bb_cost,
00609 inverse_prev_timing_cost, &delay_cost,
00610 x_lookup) == 1)
00611 {
00612 success_sum++;
00613 av_cost += cost;
00614 av_bb_cost += bb_cost;
00615 av_timing_cost += timing_cost;
00616 av_delay_cost += delay_cost;
00617 sum_of_squares += cost * cost;
00618 }
00619
00620 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
00621 || placer_opts.place_algorithm ==
00622 PATH_TIMING_DRIVEN_PLACE)
00623 {
00624
00625 if(inner_crit_iter_count >= inner_recompute_limit
00626 && inner_iter != move_lim - 1)
00627 {
00628
00629 inner_crit_iter_count = 0;
00630 #ifdef VERBOSE
00631 printf
00632 ("Inner Loop Recompute Criticalities\n");
00633 #endif
00634 if(placer_opts.place_algorithm ==
00635 NET_TIMING_DRIVEN_PLACE)
00636 {
00637 place_delay_value =
00638 delay_cost / num_connections;
00639 load_constant_net_delay(net_delay,
00640 place_delay_value);
00641 }
00642
00643 load_timing_graph_net_delays(net_delay);
00644 d_max = load_net_slack(net_slack, 0);
00645 load_criticalities(placer_opts, net_slack,
00646 d_max, crit_exponent);
00647 comp_td_costs(&timing_cost, &delay_cost);
00648 }
00649 inner_crit_iter_count++;
00650 }
00651 #ifdef VERBOSE
00652 printf
00653 ("t = %g cost = %g bb_cost = %g timing_cost = %g move = %d dmax = %g\n",
00654 t, cost, bb_cost, timing_cost, inner_iter, d_max);
00655 if(fabs
00656 (bb_cost -
00657 comp_bb_cost(CHECK, placer_opts.place_cost_type,
00658 placer_opts.num_regions)) >
00659 bb_cost * ERROR_TOL)
00660 exit(1);
00661 #endif
00662 }
00663
00664
00665
00666
00667
00668
00669 moves_since_cost_recompute += move_lim;
00670 if(moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE)
00671 {
00672 new_bb_cost =
00673 recompute_bb_cost(placer_opts.place_cost_type,
00674 placer_opts.num_regions);
00675 if(fabs(new_bb_cost - bb_cost) > bb_cost * ERROR_TOL)
00676 {
00677 printf
00678 ("Error in try_place: new_bb_cost = %g, old bb_cost = %g.\n",
00679 new_bb_cost, bb_cost);
00680 exit(1);
00681 }
00682 bb_cost = new_bb_cost;
00683
00684 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
00685 || placer_opts.place_algorithm ==
00686 PATH_TIMING_DRIVEN_PLACE)
00687 {
00688 comp_td_costs(&new_timing_cost, &new_delay_cost);
00689 if(fabs(new_timing_cost - timing_cost) >
00690 timing_cost * ERROR_TOL)
00691 {
00692 printf
00693 ("Error in try_place: new_timing_cost = %g, old timing_cost = %g.\n",
00694 new_timing_cost, timing_cost);
00695 exit(1);
00696 }
00697 if(fabs(new_delay_cost - delay_cost) >
00698 delay_cost * ERROR_TOL)
00699 {
00700 printf
00701 ("Error in try_place: new_delay_cost = %g, old delay_cost = %g.\n",
00702 new_delay_cost, delay_cost);
00703 exit(1);
00704 }
00705 timing_cost = new_timing_cost;
00706 }
00707
00708 if(placer_opts.place_algorithm == BOUNDING_BOX_PLACE)
00709 {
00710 cost = new_bb_cost;
00711 }
00712 moves_since_cost_recompute = 0;
00713 }
00714
00715 tot_iter += move_lim;
00716 success_rat = ((float)success_sum) / move_lim;
00717 if(success_sum == 0)
00718 {
00719 av_cost = cost;
00720 av_bb_cost = bb_cost;
00721 av_timing_cost = timing_cost;
00722 av_delay_cost = delay_cost;
00723 }
00724 else
00725 {
00726 av_cost /= success_sum;
00727 av_bb_cost /= success_sum;
00728 av_timing_cost /= success_sum;
00729 av_delay_cost /= success_sum;
00730 }
00731 std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);
00732
00733 #ifndef SPEC
00734 printf
00735 ("%11.5g %10.6g %11.6g %11.6g %11.6g %11.6g %11.4g %9.4g %8.3g %7.4g %7.4g %10d ",
00736 t, av_cost, av_bb_cost, av_timing_cost, av_delay_cost,
00737 place_delay_value, d_max, success_rat, std_dev, rlim,
00738 crit_exponent, tot_iter);
00739 #endif
00740
00741 oldt = t;
00742 update_t(&t, std_dev, rlim, success_rat, annealing_sched);
00743
00744 #ifndef SPEC
00745 printf("%7.4g\n", t / oldt);
00746 #endif
00747
00748 sprintf(msg,
00749 "Cost: %g BB Cost %g TD Cost %g Temperature: %g d_max: %g",
00750 cost, bb_cost, timing_cost, t, d_max);
00751 update_screen(MINOR, msg, PLACEMENT, FALSE);
00752 update_rlim(&rlim, success_rat);
00753
00754 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
00755 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
00756 {
00757 crit_exponent =
00758 (1 -
00759 (rlim -
00760 final_rlim) * inverse_delta_rlim) *
00761 (placer_opts.td_place_exp_last -
00762 placer_opts.td_place_exp_first) +
00763 placer_opts.td_place_exp_first;
00764 }
00765 #ifdef VERBOSE
00766 dump_clbs();
00767 #endif
00768 }
00769
00770 t = 0;
00771 av_cost = 0.;
00772 av_bb_cost = 0.;
00773 av_timing_cost = 0.;
00774 sum_of_squares = 0.;
00775 av_delay_cost = 0.;
00776 success_sum = 0;
00777
00778 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
00779 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
00780 {
00781
00782
00783 if(outer_crit_iter_count >= placer_opts.recompute_crit_iter ||
00784 placer_opts.inner_loop_recompute_divider != 0)
00785 {
00786
00787 #ifdef VERBOSE
00788 printf("Outer Loop Recompute Criticalities\n");
00789 #endif
00790 place_delay_value = delay_cost / num_connections;
00791
00792 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE)
00793 load_constant_net_delay(net_delay, place_delay_value);
00794
00795 load_timing_graph_net_delays(net_delay);
00796 d_max = load_net_slack(net_slack, 0);
00797 load_criticalities(placer_opts, net_slack, d_max,
00798 crit_exponent);
00799
00800 comp_td_costs(&timing_cost, &delay_cost);
00801 outer_crit_iter_count = 0;
00802 }
00803 outer_crit_iter_count++;
00804
00805 inverse_prev_bb_cost = 1 / (bb_cost);
00806 inverse_prev_timing_cost = 1 / (timing_cost);
00807 }
00808
00809 inner_crit_iter_count = 1;
00810
00811 for(inner_iter = 0; inner_iter < move_lim; inner_iter++)
00812 {
00813 if(try_swap(t, &cost, &bb_cost, &timing_cost,
00814 rlim, placer_opts.place_cost_type,
00815 old_region_occ_x, old_region_occ_y,
00816 placer_opts.num_regions, fixed_pins,
00817 placer_opts.place_algorithm,
00818 placer_opts.timing_tradeoff, inverse_prev_bb_cost,
00819 inverse_prev_timing_cost, &delay_cost, x_lookup) == 1)
00820 {
00821 success_sum++;
00822 av_cost += cost;
00823 av_bb_cost += bb_cost;
00824 av_delay_cost += delay_cost;
00825 av_timing_cost += timing_cost;
00826 sum_of_squares += cost * cost;
00827
00828 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
00829 || placer_opts.place_algorithm ==
00830 PATH_TIMING_DRIVEN_PLACE)
00831 {
00832
00833 if(inner_crit_iter_count >= inner_recompute_limit
00834 && inner_iter != move_lim - 1)
00835 {
00836
00837 inner_crit_iter_count = 0;
00838 #ifdef VERBOSE
00839 printf
00840 ("Inner Loop Recompute Criticalities\n");
00841 #endif
00842 if(placer_opts.place_algorithm ==
00843 NET_TIMING_DRIVEN_PLACE)
00844 {
00845 place_delay_value =
00846 delay_cost / num_connections;
00847 load_constant_net_delay(net_delay,
00848 place_delay_value);
00849 }
00850
00851 load_timing_graph_net_delays(net_delay);
00852 d_max = load_net_slack(net_slack, 0);
00853 load_criticalities(placer_opts, net_slack,
00854 d_max, crit_exponent);
00855 comp_td_costs(&timing_cost, &delay_cost);
00856 }
00857 inner_crit_iter_count++;
00858 }
00859 }
00860 #ifdef VERBOSE
00861 printf("t = %g cost = %g move = %d\n", t, cost, tot_iter);
00862 #endif
00863 }
00864 tot_iter += move_lim;
00865 success_rat = ((float)success_sum) / move_lim;
00866 if(success_sum == 0)
00867 {
00868 av_cost = cost;
00869 av_bb_cost = bb_cost;
00870 av_delay_cost = delay_cost;
00871 av_timing_cost = timing_cost;
00872 }
00873 else
00874 {
00875 av_cost /= success_sum;
00876 av_bb_cost /= success_sum;
00877 av_delay_cost /= success_sum;
00878 av_timing_cost /= success_sum;
00879 }
00880
00881 std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);
00882
00883
00884 #ifndef SPEC
00885 printf
00886 ("%11.5g %10.6g %11.6g %11.6g %11.6g %11.6g %11.4g %9.4g %8.3g %7.4g %7.4g %10d \n\n",
00887 t, av_cost, av_bb_cost, av_timing_cost, av_delay_cost,
00888 place_delay_value, d_max, success_rat, std_dev, rlim,
00889 crit_exponent, tot_iter);
00890
00891 #endif
00892
00893 #ifdef VERBOSE
00894 dump_clbs();
00895 #endif
00896
00897 check_place(bb_cost, timing_cost, placer_opts.place_cost_type,
00898 placer_opts.num_regions, placer_opts.place_algorithm,
00899 delay_cost);
00900
00901 if(placer_opts.enable_timing_computations &&
00902 placer_opts.place_algorithm == BOUNDING_BOX_PLACE)
00903 {
00904
00905
00906 for(inet = 0; inet < num_nets; inet++)
00907 for(ipin = 1; ipin <= net[inet].num_sinks; ipin++)
00908 timing_place_crit[inet][ipin] = 0;
00909 comp_td_costs(&timing_cost, &delay_cost);
00910 }
00911
00912 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
00913 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE ||
00914 placer_opts.enable_timing_computations)
00915 {
00916 net_delay = point_to_point_delay_cost;
00917
00918 load_timing_graph_net_delays(net_delay);
00919 est_crit = load_net_slack(net_slack, 0);
00920 #ifdef CREATE_ECHO_FILES
00921
00922 print_net_slack("placement_net_slacks.echo", net_slack);
00923 print_critical_path("placement_crit_path.echo",
00924 *subblock_data_ptr);
00925 #endif
00926 printf("Placement Estimated Crit Path Delay: %g\n\n", est_crit);
00927 }
00928
00929
00930 sprintf(msg,
00931 "Placement. Cost: %g bb_cost: %g td_cost: %g Channel Factor: %d d_max: %g",
00932 cost, bb_cost, timing_cost, width_fac, d_max);
00933 printf
00934 ("Placement. Cost: %g bb_cost: %g td_cost: %g delay_cost: %g.\n",
00935 cost, bb_cost, timing_cost, delay_cost);
00936 update_screen(MAJOR, msg, PLACEMENT, FALSE);
00937
00938 #ifdef SPEC
00939 printf("Total moves attempted: %d.0\n", tot_iter);
00940 #endif
00941
00942 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
00943 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE ||
00944 placer_opts.enable_timing_computations)
00945 {
00946
00947 net_delay = remember_net_delay_original_ptr;
00948
00949 free_placement_structs(placer_opts.place_cost_type,
00950 placer_opts.num_regions, old_region_occ_x,
00951 old_region_occ_y, placer_opts);
00952 free_lookups_and_criticalities(&net_delay, &net_slack);
00953 }
00954
00955
00956
00957
00958 if(*mst)
00959 {
00960 for(inet = 0; inet < num_nets; inet++)
00961 {
00962 assert((*mst)[inet]);
00963 free((*mst)[inet]);
00964 }
00965 free(*mst);
00966 }
00967 *mst = (t_mst_edge **) my_malloc(sizeof(t_mst_edge *) * num_nets);
00968 for(inet = 0; inet < num_nets; inet++)
00969 {
00970 (*mst)[inet] = get_mst_of_net(inet);
00971 }
00972 free(x_lookup);
00973 }
00974
00975 static int
00976 count_connections()
00977 {
00978
00979
00980 int count, inet;
00981
00982 count = 0;
00983
00984 for(inet = 0; inet < num_nets; inet++)
00985 {
00986
00987 if(net[inet].is_global)
00988 continue;
00989
00990 count += net[inet].num_sinks;
00991 }
00992 return (count);
00993 }
00994
00995 static void
00996 compute_net_pin_index_values()
00997 {
00998
00999
01000
01001 int inet, netpin, blk, iblk, ipin;
01002 t_type_ptr type;
01003
01004
01005 for(iblk = 0; iblk < num_blocks; iblk++)
01006 {
01007 type = block[iblk].type;
01008 for(ipin = 0; ipin < type->num_pins; ipin++)
01009 {
01010 net_pin_index[iblk][ipin] = OPEN;
01011 }
01012 }
01013
01014 for(inet = 0; inet < num_nets; inet++)
01015 {
01016
01017 if(net[inet].is_global)
01018 continue;
01019
01020 for(netpin = 0; netpin <= net[inet].num_sinks; netpin++)
01021 {
01022 blk = net[inet].node_block[netpin];
01023 net_pin_index[blk][net[inet].node_block_pin[netpin]] =
01024 netpin;
01025 }
01026 }
01027 }
01028
01029
01030 static double
01031 get_std_dev(int n,
01032 double sum_x_squared,
01033 double av_x)
01034 {
01035
01036
01037
01038
01039
01040
01041 double std_dev;
01042
01043 if(n <= 1)
01044 std_dev = 0.;
01045 else
01046 std_dev = (sum_x_squared - n * av_x * av_x) / (double)(n - 1);
01047
01048 if(std_dev > 0.)
01049 std_dev = sqrt(std_dev);
01050 else
01051 std_dev = 0.;
01052
01053 return (std_dev);
01054 }
01055
01056
01057 static void
01058 update_rlim(float *rlim,
01059 float success_rat)
01060 {
01061
01062
01063
01064
01065 float upper_lim;
01066
01067 *rlim = (*rlim) * (1. - 0.44 + success_rat);
01068 upper_lim = max(nx, ny);
01069 *rlim = min(*rlim, upper_lim);
01070 *rlim = max(*rlim, 1.);
01071 }
01072
01073
01074
01075 static void
01076 update_t(float *t,
01077 float std_dev,
01078 float rlim,
01079 float success_rat,
01080 struct s_annealing_sched annealing_sched)
01081 {
01082
01083
01084
01085 if(annealing_sched.type == USER_SCHED)
01086 {
01087 *t = annealing_sched.alpha_t * (*t);
01088 }
01089
01090
01091
01092
01093
01094 #if 0
01095 else if(std_dev == 0.)
01096 {
01097 *t = 0.;
01098 }
01099 else
01100 {
01101 fac = exp(-LAMBDA * (*t) / std_dev);
01102 fac = max(0.5, fac);
01103 *t = (*t) * fac;
01104 }
01105 #endif
01106
01107
01108 else
01109 {
01110 if(success_rat > 0.96)
01111 {
01112 *t = (*t) * 0.5;
01113 }
01114 else if(success_rat > 0.8)
01115 {
01116 *t = (*t) * 0.9;
01117 }
01118 else if(success_rat > 0.15 || rlim > 1.)
01119 {
01120 *t = (*t) * 0.95;
01121 }
01122 else
01123 {
01124 *t = (*t) * 0.8;
01125 }
01126 }
01127 }
01128
01129
01130 static int
01131 exit_crit(float t,
01132 float cost,
01133 struct s_annealing_sched annealing_sched)
01134 {
01135
01136
01137
01138 if(annealing_sched.type == USER_SCHED)
01139 {
01140 if(t < annealing_sched.exit_t)
01141 {
01142 return (1);
01143 }
01144 else
01145 {
01146 return (0);
01147 }
01148 }
01149
01150
01151
01152 if(t < 0.005 * cost / num_nets)
01153 {
01154 return (1);
01155 }
01156 else
01157 {
01158 return (0);
01159 }
01160 }
01161
01162
01163 static float
01164 starting_t(float *cost_ptr,
01165 float *bb_cost_ptr,
01166 float *timing_cost_ptr,
01167 int place_cost_type,
01168 float **old_region_occ_x,
01169 float **old_region_occ_y,
01170 int num_regions,
01171 boolean fixed_pins,
01172 struct s_annealing_sched annealing_sched,
01173 int max_moves,
01174 float rlim,
01175 enum e_place_algorithm place_algorithm,
01176 float timing_tradeoff,
01177 float inverse_prev_bb_cost,
01178 float inverse_prev_timing_cost,
01179 float *delay_cost_ptr)
01180 {
01181
01182
01183
01184 int i, num_accepted, move_lim;
01185 double std_dev, av, sum_of_squares;
01186 int *x_lookup;
01187
01188 x_lookup = (int *)my_malloc(nx * sizeof(int));
01189
01190 if(annealing_sched.type == USER_SCHED)
01191 return (annealing_sched.init_t);
01192
01193 move_lim = min(max_moves, num_blocks);
01194
01195 num_accepted = 0;
01196 av = 0.;
01197 sum_of_squares = 0.;
01198
01199
01200
01201 for(i = 0; i < move_lim; i++)
01202 {
01203 if(try_swap(1.e30, cost_ptr, bb_cost_ptr, timing_cost_ptr, rlim,
01204 place_cost_type,
01205 old_region_occ_x, old_region_occ_y, num_regions,
01206 fixed_pins, place_algorithm, timing_tradeoff,
01207 inverse_prev_bb_cost, inverse_prev_timing_cost,
01208 delay_cost_ptr, x_lookup) == 1)
01209 {
01210 num_accepted++;
01211 av += *cost_ptr;
01212 sum_of_squares += *cost_ptr * (*cost_ptr);
01213 }
01214 }
01215
01216 if(num_accepted != 0)
01217 av /= num_accepted;
01218 else
01219 av = 0.;
01220
01221 std_dev = get_std_dev(num_accepted, sum_of_squares, av);
01222
01223 #ifdef DEBUG
01224 if(num_accepted != move_lim)
01225 {
01226 printf
01227 ("Warning: Starting t: %d of %d configurations accepted.\n",
01228 num_accepted, move_lim);
01229 }
01230 #endif
01231
01232 #ifdef VERBOSE
01233 printf("std_dev: %g, average cost: %g, starting temp: %g\n",
01234 std_dev, av, 20. * std_dev);
01235 #endif
01236
01237 free(x_lookup);
01238
01239
01240
01241 return (20. * std_dev);
01242 }
01243
01244
01245 static int
01246 try_swap(float t,
01247 float *cost,
01248 float *bb_cost,
01249 float *timing_cost,
01250 float rlim,
01251 int place_cost_type,
01252 float **old_region_occ_x,
01253 float **old_region_occ_y,
01254 int num_regions,
01255 boolean fixed_pins,
01256 enum e_place_algorithm place_algorithm,
01257 float timing_tradeoff,
01258 float inverse_prev_bb_cost,
01259 float inverse_prev_timing_cost,
01260 float *delay_cost,
01261 int *x_lookup)
01262 {
01263
01264
01265
01266
01267
01268
01269
01270 int b_from, x_to, y_to, z_to, x_from, y_from, z_from, b_to;
01271 int i, k, inet, keep_switch, num_of_pins, max_pins_per_fb;
01272 int num_nets_affected, bb_index;
01273 float delta_c, bb_delta_c, timing_delta_c, delay_delta_c, newcost;
01274 static struct s_bb *bb_coord_new = NULL;
01275 static struct s_bb *bb_edge_new = NULL;
01276 static int *nets_to_update = NULL, *net_block_moved = NULL;
01277
01278 max_pins_per_fb = 0;
01279 for(i = 0; i < num_types; i++)
01280 {
01281 max_pins_per_fb =
01282 max(max_pins_per_fb, type_descriptors[i].num_pins);
01283 }
01284
01285
01286
01287 if(bb_coord_new == NULL)
01288 {
01289 bb_coord_new = (struct s_bb *)my_malloc(2 * max_pins_per_fb *
01290 sizeof(struct s_bb));
01291 bb_edge_new = (struct s_bb *)my_malloc(2 * max_pins_per_fb *
01292 sizeof(struct s_bb));
01293 nets_to_update =
01294 (int *)my_malloc(2 * max_pins_per_fb * sizeof(int));
01295 net_block_moved =
01296 (int *)my_malloc(2 * max_pins_per_fb * sizeof(int));
01297 }
01298
01299
01300 delay_delta_c = 0.0;
01301 b_from = my_irand(num_blocks - 1);
01302
01303
01304
01305
01306
01307
01308
01309
01310 if(fixed_pins == TRUE)
01311 {
01312 while(block[b_from].type == IO_TYPE)
01313 {
01314 b_from = my_irand(num_blocks - 1);
01315 }
01316 }
01317
01318 x_from = block[b_from].x;
01319 y_from = block[b_from].y;
01320 z_from = block[b_from].z;
01321
01322 if(!find_to
01323 (x_from, y_from, block[b_from].type, rlim, x_lookup, &x_to, &y_to))
01324 return FALSE;
01325
01326
01327
01328
01329
01330
01331 z_to = 0;
01332 if(grid[x_to][y_to].type->capacity > 1)
01333 {
01334 z_to = my_irand(grid[x_to][y_to].type->capacity - 1);
01335 }
01336 if(grid[x_to][y_to].blocks[z_to] == EMPTY)
01337 {
01338 b_to = EMPTY;
01339 block[b_from].x = x_to;
01340 block[b_from].y = y_to;
01341 block[b_from].z = z_to;
01342 }
01343 else
01344 {
01345 b_to = grid[x_to][y_to].blocks[z_to];
01346 block[b_to].x = x_from;
01347 block[b_to].y = y_from;
01348 block[b_to].z = z_from;
01349
01350 block[b_from].x = x_to;
01351 block[b_from].y = y_to;
01352 block[b_from].z = z_to;
01353 }
01354
01355
01356
01357
01358
01359
01360
01361 delta_c = 0;
01362 bb_delta_c = 0;
01363 timing_delta_c = 0;
01364
01365 num_of_pins = block[b_from].type->num_pins;
01366
01367 num_nets_affected = find_affected_nets(nets_to_update, net_block_moved,
01368 b_from, b_to, num_of_pins);
01369
01370 if(place_cost_type == NONLINEAR_CONG)
01371 {
01372 save_region_occ(old_region_occ_x, old_region_occ_y, num_regions);
01373 }
01374
01375 bb_index = 0;
01376
01377 for(k = 0; k < num_nets_affected; k++)
01378 {
01379 inet = nets_to_update[k];
01380
01381
01382
01383
01384 if(net_block_moved[k] == FROM_AND_TO)
01385 continue;
01386
01387 if(net[inet].num_sinks < SMALL_NET)
01388 {
01389 get_non_updateable_bb(inet, &bb_coord_new[bb_index]);
01390 }
01391 else
01392 {
01393 if(net_block_moved[k] == FROM)
01394 update_bb(inet, &bb_coord_new[bb_index],
01395 &bb_edge_new[bb_index], x_from, y_from,
01396 x_to, y_to);
01397 else
01398 update_bb(inet, &bb_coord_new[bb_index],
01399 &bb_edge_new[bb_index], x_to, y_to, x_from,
01400 y_from);
01401 }
01402
01403 if(place_cost_type != NONLINEAR_CONG)
01404 {
01405 temp_net_cost[inet] =
01406 get_net_cost(inet, &bb_coord_new[bb_index]);
01407 bb_delta_c += temp_net_cost[inet] - net_cost[inet];
01408 }
01409 else
01410 {
01411
01412 update_region_occ(inet, &bb_coords[inet], -1,
01413 num_regions);
01414 update_region_occ(inet, &bb_coord_new[bb_index], 1,
01415 num_regions);
01416 }
01417
01418 bb_index++;
01419 }
01420
01421 if(place_cost_type == NONLINEAR_CONG)
01422 {
01423 newcost = nonlinear_cong_cost(num_regions);
01424 bb_delta_c = newcost - *bb_cost;
01425 }
01426
01427
01428 if(place_algorithm == NET_TIMING_DRIVEN_PLACE ||
01429 place_algorithm == PATH_TIMING_DRIVEN_PLACE)
01430 {
01431
01432
01433
01434
01435 comp_delta_td_cost(b_from, b_to, num_of_pins, &timing_delta_c,
01436 &delay_delta_c);
01437
01438 delta_c =
01439 (1 - timing_tradeoff) * bb_delta_c * inverse_prev_bb_cost +
01440 timing_tradeoff * timing_delta_c * inverse_prev_timing_cost;
01441 }
01442 else
01443 {
01444 delta_c = bb_delta_c;
01445 }
01446
01447
01448 keep_switch = assess_swap(delta_c, t);
01449
01450
01451
01452 if(keep_switch)
01453 {
01454 *cost = *cost + delta_c;
01455 *bb_cost = *bb_cost + bb_delta_c;
01456
01457
01458 if(place_algorithm == NET_TIMING_DRIVEN_PLACE ||
01459 place_algorithm == PATH_TIMING_DRIVEN_PLACE)
01460 {
01461
01462
01463 *timing_cost = *timing_cost + timing_delta_c;
01464 *delay_cost = *delay_cost + delay_delta_c;
01465
01466 update_td_cost(b_from, b_to, num_of_pins);
01467 }
01468
01469
01470
01471 bb_index = 0;
01472
01473 for(k = 0; k < num_nets_affected; k++)
01474 {
01475 inet = nets_to_update[k];
01476
01477
01478
01479
01480 if(net_block_moved[k] == FROM_AND_TO)
01481 {
01482 temp_net_cost[inet] = -1;
01483 continue;
01484 }
01485
01486 bb_coords[inet] = bb_coord_new[bb_index];
01487 if(net[inet].num_sinks >= SMALL_NET)
01488 bb_num_on_edges[inet] = bb_edge_new[bb_index];
01489
01490 bb_index++;
01491
01492 net_cost[inet] = temp_net_cost[inet];
01493 temp_net_cost[inet] = -1;
01494 }
01495
01496
01497
01498 grid[x_to][y_to].blocks[z_to] = b_from;
01499 grid[x_from][y_from].blocks[z_from] = b_to;
01500
01501 if(EMPTY == b_to)
01502 {
01503 grid[x_to][y_to].usage++;
01504 grid[x_from][y_from].usage--;
01505 }
01506 }
01507
01508 else
01509 {
01510
01511
01512 for(k = 0; k < num_nets_affected; k++)
01513 {
01514 inet = nets_to_update[k];
01515 temp_net_cost[inet] = -1;
01516 }
01517
01518
01519 block[b_from].x = x_from;
01520 block[b_from].y = y_from;
01521 block[b_from].z = z_from;
01522 if(b_to != EMPTY)
01523 {
01524 block[b_to].x = x_to;
01525 block[b_to].y = y_to;
01526 block[b_to].z = z_to;
01527 }
01528
01529
01530 if(place_cost_type == NONLINEAR_CONG)
01531 {
01532 restore_region_occ(old_region_occ_x, old_region_occ_y,
01533 num_regions);
01534 }
01535 }
01536
01537 return (keep_switch);
01538 }
01539
01540
01541 static void
01542 save_region_occ(float **old_region_occ_x,
01543 float **old_region_occ_y,
01544 int num_regions)
01545 {
01546
01547
01548
01549
01550 int i, j;
01551
01552 for(i = 0; i < num_regions; i++)
01553 {
01554 for(j = 0; j < num_regions; j++)
01555 {
01556 old_region_occ_x[i][j] = place_region_x[i][j].occupancy;
01557 old_region_occ_y[i][j] = place_region_y[i][j].occupancy;
01558 }
01559 }
01560 }
01561
01562
01563 static void
01564 restore_region_occ(float **old_region_occ_x,
01565 float **old_region_occ_y,
01566 int num_regions)
01567 {
01568
01569
01570
01571
01572 int i, j;
01573
01574 for(i = 0; i < num_regions; i++)
01575 {
01576 for(j = 0; j < num_regions; j++)
01577 {
01578 place_region_x[i][j].occupancy = old_region_occ_x[i][j];
01579 place_region_y[i][j].occupancy = old_region_occ_y[i][j];
01580 }
01581 }
01582 }
01583
01584
01585 static int
01586 find_affected_nets(int *nets_to_update,
01587 int *net_block_moved,
01588 int b_from,
01589 int b_to,
01590 int num_of_pins)
01591 {
01592
01593
01594
01595
01596
01597
01598 int k, inet, affected_index, count;
01599
01600 affected_index = 0;
01601
01602 for(k = 0; k < num_of_pins; k++)
01603 {
01604 inet = block[b_from].nets[k];
01605
01606 if(inet == OPEN)
01607 continue;
01608
01609 if(net[inet].is_global)
01610 continue;
01611
01612
01613
01614 if(temp_net_cost[inet] > 0.)
01615 continue;
01616
01617 nets_to_update[affected_index] = inet;
01618 net_block_moved[affected_index] = FROM;
01619 affected_index++;
01620 temp_net_cost[inet] = 1.;
01621 }
01622
01623 if(b_to != EMPTY)
01624 {
01625 for(k = 0; k < num_of_pins; k++)
01626 {
01627 inet = block[b_to].nets[k];
01628
01629 if(inet == OPEN)
01630 continue;
01631
01632 if(net[inet].is_global)
01633 continue;
01634
01635 if(temp_net_cost[inet] > 0.)
01636 {
01637 for(count = 0; count < affected_index; count++)
01638 {
01639 if(nets_to_update[count] == inet)
01640 {
01641 if(net_block_moved[count] == FROM)
01642 net_block_moved[count] =
01643 FROM_AND_TO;
01644 break;
01645 }
01646 }
01647
01648 #ifdef DEBUG
01649 if(count > affected_index)
01650 {
01651 printf
01652 ("Error in find_affected_nets -- count = %d,"
01653 " affected index = %d.\n", count,
01654 affected_index);
01655 exit(1);
01656 }
01657 #endif
01658 }
01659
01660 else
01661 {
01662
01663 nets_to_update[affected_index] = inet;
01664 net_block_moved[affected_index] = TO;
01665 affected_index++;
01666 temp_net_cost[inet] = 1.;
01667 }
01668 }
01669 }
01670
01671 return (affected_index);
01672 }
01673
01674
01675 static boolean
01676 find_to(int x_from,
01677 int y_from,
01678 t_type_ptr type,
01679 float rlim,
01680 int *x_lookup,
01681 int *x_to,
01682 int *y_to)
01683 {
01684
01685
01686
01687
01688
01689
01690 int x_rel, y_rel, iside, iplace, rlx, rly, min_x, max_x, min_y, max_y;
01691 int num_col_same_type, i, j;
01692
01693 rlx = min(nx, rlim);
01694 rly = min(ny, rlim);
01695
01696 min_x = max(1, x_from - rlx);
01697 max_x = min(nx, x_from + rlx);
01698 min_y = max(1, y_from - rly);
01699 max_y = min(ny, y_from + rly);
01700
01701 num_col_same_type = 0;
01702 j = 0;
01703 if(type != IO_TYPE)
01704 {
01705 for(i = min_x; i <= max_x; i++)
01706 {
01707 if(grid[i][1].type == type)
01708 {
01709 num_col_same_type++;
01710 x_lookup[j] = i;
01711 j++;
01712 }
01713 }
01714 assert(num_col_same_type != 0);
01715 if(num_col_same_type == 1 &&
01716 ((((max_y - min_y) / type->height) - 1) <= 0
01717 || type->height > (ny / 2)))
01718 return FALSE;
01719 }
01720
01721 #ifdef DEBUG
01722 if(rlx < 1 || rlx > nx)
01723 {
01724 printf("Error in find_to: rlx = %d\n", rlx);
01725 exit(1);
01726 }
01727 #endif
01728
01729 do
01730 {
01731 if(type == IO_TYPE)
01732 {
01733 if(rlx >= nx)
01734 {
01735 iside = my_irand(3);
01736
01737
01738
01739
01740
01741
01742
01743
01744
01745 switch (iside)
01746 {
01747 case 0:
01748 iplace = my_irand(ny - 1) + 1;
01749 *x_to = 0;
01750 *y_to = iplace;
01751 break;
01752 case 1:
01753 iplace = my_irand(nx - 1) + 1;
01754 *x_to = iplace;
01755 *y_to = ny + 1;
01756 break;
01757 case 2:
01758 iplace = my_irand(ny - 1) + 1;
01759 *x_to = nx + 1;
01760 *y_to = iplace;
01761 break;
01762 case 3:
01763 iplace = my_irand(nx - 1) + 1;
01764 *x_to = iplace;
01765 *y_to = 0;
01766 break;
01767 default:
01768 printf
01769 ("Error in find_to. Unexpected io swap location.\n");
01770 exit(1);
01771 }
01772 }
01773 else
01774 {
01775 if(x_from == 0)
01776 {
01777 iplace = my_irand(2 * rly);
01778 *y_to = y_from - rly + iplace;
01779 *x_to = x_from;
01780 if(*y_to > ny)
01781 {
01782 *y_to = ny + 1;
01783 *x_to = my_irand(rlx - 1) + 1;
01784 }
01785 else if(*y_to < 1)
01786 {
01787 *y_to = 0;
01788 *x_to = my_irand(rlx - 1) + 1;
01789 }
01790 }
01791 else if(x_from == nx + 1)
01792 {
01793 iplace = my_irand(2 * rly);
01794 *y_to = y_from - rly + iplace;
01795 *x_to = x_from;
01796 if(*y_to > ny)
01797 {
01798 *y_to = ny + 1;
01799 *x_to = nx - my_irand(rlx - 1);
01800 }
01801 else if(*y_to < 1)
01802 {
01803 *y_to = 0;
01804 *x_to = nx - my_irand(rlx - 1);
01805 }
01806 }
01807 else if(y_from == 0)
01808 {
01809 iplace = my_irand(2 * rlx);
01810 *x_to = x_from - rlx + iplace;
01811 *y_to = y_from;
01812 if(*x_to > nx)
01813 {
01814 *x_to = nx + 1;
01815 *y_to = my_irand(rly - 1) + 1;
01816 }
01817 else if(*x_to < 1)
01818 {
01819 *x_to = 0;
01820 *y_to = my_irand(rly - 1) + 1;
01821 }
01822 }
01823 else
01824 {
01825 iplace = my_irand(2 * rlx);
01826 *x_to = x_from - rlx + iplace;
01827 *y_to = y_from;
01828 if(*x_to > nx)
01829 {
01830 *x_to = nx + 1;
01831 *y_to = ny - my_irand(rly - 1);
01832 }
01833 else if(*x_to < 1)
01834 {
01835 *x_to = 0;
01836 *y_to = ny - my_irand(rly - 1);
01837 }
01838 }
01839 }
01840 }
01841 else
01842 {
01843 x_rel = my_irand(num_col_same_type - 1);
01844 y_rel =
01845 my_irand(max
01846 (0, ((max_y - min_y) / type->height) - 1));
01847 *x_to = x_lookup[x_rel];
01848 *y_to = min_y + y_rel * type->height;
01849 *y_to = (*y_to) - grid[*x_to][*y_to].offset;
01850 assert(*x_to >= 1 && *x_to <= nx);
01851 assert(*y_to >= 1 && *y_to <= ny);
01852 }
01853 }
01854 while((x_from == *x_to) && (y_from == *y_to));
01855
01856 #ifdef DEBUG
01857 if(*x_to < 0 || *x_to > nx + 1 || *y_to < 0 || *y_to > ny + 1)
01858 {
01859 printf("Error in routine find_to: (x_to,y_to) = (%d,%d)\n",
01860 *x_to, *y_to);
01861 exit(1);
01862 }
01863 #endif
01864 assert(type == grid[*x_to][*y_to].type);
01865 return TRUE;
01866 }
01867
01868
01869 static int
01870 assess_swap(float delta_c,
01871 float t)
01872 {
01873
01874
01875
01876 int accept;
01877 float prob_fac, fnum;
01878
01879 if(delta_c <= 0)
01880 {
01881
01882 #ifdef SPEC
01883 fnum = my_frand();
01884 #endif
01885
01886 accept = 1;
01887 return (accept);
01888 }
01889
01890 if(t == 0.)
01891 return (0);
01892
01893 fnum = my_frand();
01894 prob_fac = exp(-delta_c / t);
01895 if(prob_fac > fnum)
01896 {
01897 accept = 1;
01898 }
01899 else
01900 {
01901 accept = 0;
01902 }
01903 return (accept);
01904 }
01905
01906
01907 static float
01908 recompute_bb_cost(int place_cost_type,
01909 int num_regions)
01910 {
01911
01912
01913
01914
01915
01916 int i, j, inet;
01917 float cost;
01918
01919 cost = 0;
01920
01921
01922
01923 if(place_cost_type == NONLINEAR_CONG)
01924 {
01925 for(i = 0; i < num_regions; i++)
01926 {
01927 for(j = 0; j < num_regions; j++)
01928 {
01929 place_region_x[i][j].occupancy = 0.;
01930 place_region_y[i][j].occupancy = 0.;
01931 }
01932 }
01933 }
01934
01935 for(inet = 0; inet < num_nets; inet++)
01936 {
01937
01938 if(net[inet].is_global == FALSE)
01939 {
01940
01941
01942
01943 if(place_cost_type != NONLINEAR_CONG)
01944 {
01945 cost += net_cost[inet];
01946 }
01947 else
01948 {
01949 update_region_occ(inet, &bb_coords[inet], 1,
01950 num_regions);
01951 }
01952 }
01953 }
01954
01955 if(place_cost_type == NONLINEAR_CONG)
01956 {
01957 cost = nonlinear_cong_cost(num_regions);
01958 }
01959
01960 return (cost);
01961 }
01962
01963
01964 static float
01965 comp_td_point_to_point_delay(int inet,
01966 int ipin)
01967 {
01968
01969
01970
01971 int source_block, sink_block;
01972 int delta_x, delta_y;
01973 t_type_ptr source_type, sink_type;
01974 float delay_source_to_sink;
01975
01976 delay_source_to_sink = 0.;
01977
01978 source_block = net[inet].node_block[0];
01979 source_type = block[source_block].type;
01980
01981 sink_block = net[inet].node_block[ipin];
01982 sink_type = block[sink_block].type;
01983
01984 assert(source_type != NULL);
01985 assert(sink_type != NULL);
01986
01987 delta_x = abs(block[sink_block].x - block[source_block].x);
01988 delta_y = abs(block[sink_block].y - block[source_block].y);
01989
01990
01991
01992
01993
01994 if(source_type == IO_TYPE)
01995 {
01996 if(sink_type == IO_TYPE)
01997 delay_source_to_sink = delta_io_to_io[delta_x][delta_y];
01998 else
01999 delay_source_to_sink = delta_io_to_fb[delta_x][delta_y];
02000 }
02001 else
02002 {
02003 if(sink_type == IO_TYPE)
02004 delay_source_to_sink = delta_fb_to_io[delta_x][delta_y];
02005 else
02006 delay_source_to_sink = delta_fb_to_fb[delta_x][delta_y];
02007 }
02008 if(delay_source_to_sink < 0)
02009 {
02010 printf
02011 ("Error in comp_td_point_to_point_delay in place.c, bad delay_source_to_sink value\n");
02012 exit(1);
02013 }
02014
02015 if(delay_source_to_sink < 0.)
02016 {
02017 printf
02018 ("Error in comp_td_point_to_point_delay in place.c, delay is less than 0\n");
02019 exit(1);
02020 }
02021
02022 return (delay_source_to_sink);
02023 }
02024
02025
02026
02027 static void
02028 update_td_cost(int b_from,
02029 int b_to,
02030 int num_of_pins)
02031 {
02032
02033
02034
02035 int blkpin, net_pin, inet, ipin;
02036
02037 for(blkpin = 0; blkpin < num_of_pins; blkpin++)
02038 {
02039
02040 inet = block[b_from].nets[blkpin];
02041
02042 if(inet == OPEN)
02043 continue;
02044
02045 if(net[inet].is_global)
02046 continue;
02047
02048 net_pin = net_pin_index[b_from][blkpin];
02049
02050 if(net_pin != 0)
02051 {
02052
02053
02054 if(net[inet].node_block[0] != b_to
02055 && net[inet].node_block[0] != b_from)
02056 {
02057
02058 point_to_point_delay_cost[inet][net_pin] =
02059 temp_point_to_point_delay_cost[inet][net_pin];
02060 temp_point_to_point_delay_cost[inet][net_pin] =
02061 -1;
02062
02063 point_to_point_timing_cost[inet][net_pin] =
02064 temp_point_to_point_timing_cost[inet]
02065 [net_pin];
02066 temp_point_to_point_timing_cost[inet][net_pin] =
02067 -1;
02068 }
02069 }
02070 else
02071 {
02072
02073 for(ipin = 1; ipin <= net[inet].num_sinks; ipin++)
02074 {
02075
02076 point_to_point_delay_cost[inet][ipin] =
02077 temp_point_to_point_delay_cost[inet][ipin];
02078 temp_point_to_point_delay_cost[inet][ipin] = -1;
02079
02080 point_to_point_timing_cost[inet][ipin] =
02081 temp_point_to_point_timing_cost[inet][ipin];
02082 temp_point_to_point_timing_cost[inet][ipin] = -1;
02083 }
02084 }
02085 }
02086
02087 if(b_to != EMPTY)
02088 {
02089 for(blkpin = 0; blkpin < num_of_pins; blkpin++)
02090 {
02091
02092 inet = block[b_to].nets[blkpin];
02093
02094 if(inet == OPEN)
02095 continue;
02096
02097 if(net[inet].is_global)
02098 continue;
02099
02100 net_pin = net_pin_index[b_to][blkpin];
02101
02102 if(net_pin != 0)
02103 {
02104
02105
02106 if(net[inet].node_block[0] != b_to
02107 && net[inet].node_block[0] != b_from)
02108 {
02109
02110 point_to_point_delay_cost[inet][net_pin] =
02111 temp_point_to_point_delay_cost[inet]
02112 [net_pin];
02113 temp_point_to_point_delay_cost[inet]
02114 [net_pin] = -1;
02115
02116 point_to_point_timing_cost[inet][net_pin]
02117 =
02118 temp_point_to_point_timing_cost[inet]
02119 [net_pin];
02120 temp_point_to_point_timing_cost[inet]
02121 [net_pin] = -1;
02122 }
02123 }
02124 else
02125 {
02126
02127 for(ipin = 1; ipin <= net[inet].num_sinks; ipin++)
02128 {
02129
02130 point_to_point_delay_cost[inet][ipin] =
02131 temp_point_to_point_delay_cost[inet]
02132 [ipin];
02133 temp_point_to_point_delay_cost[inet][ipin]
02134 = -1;
02135
02136 point_to_point_timing_cost[inet][ipin] =
02137 temp_point_to_point_timing_cost[inet]
02138 [ipin];
02139 temp_point_to_point_timing_cost[inet]
02140 [ipin] = -1;
02141 }
02142 }
02143 }
02144 }
02145 }
02146
02147
02148 static void
02149 comp_delta_td_cost(int b_from,
02150 int b_to,
02151 int num_of_pins,
02152 float *delta_timing,
02153 float *delta_delay)
02154 {
02155
02156
02157
02158
02159
02160
02161 int inet, k, net_pin, ipin;
02162 float delta_timing_cost, delta_delay_cost, temp_delay;
02163
02164 delta_timing_cost = 0.;
02165 delta_delay_cost = 0.;
02166
02167
02168 for(k = 0; k < num_of_pins; k++)
02169 {
02170 inet = block[b_from].nets[k];
02171
02172 if(inet == OPEN)
02173 continue;
02174
02175 if(net[inet].is_global)
02176 continue;
02177
02178 net_pin = net_pin_index[b_from][k];
02179
02180 if(net_pin != 0)
02181 {
02182
02183
02184
02185
02186
02187
02188 if(net[inet].node_block[0] != b_to
02189 && net[inet].node_block[0] != b_from)
02190 {
02191 temp_delay =
02192 comp_td_point_to_point_delay(inet, net_pin);
02193
02194 temp_point_to_point_delay_cost[inet][net_pin] =
02195 temp_delay;
02196 temp_point_to_point_timing_cost[inet][net_pin] =
02197 timing_place_crit[inet][net_pin] * temp_delay;
02198
02199 delta_delay_cost +=
02200 temp_point_to_point_delay_cost[inet][net_pin]
02201 - point_to_point_delay_cost[inet][net_pin];
02202
02203 delta_timing_cost +=
02204 temp_point_to_point_timing_cost[inet][net_pin]
02205 - point_to_point_timing_cost[inet][net_pin];
02206 }
02207 }
02208 else
02209 {
02210
02211 for(ipin = 1; ipin <= net[inet].num_sinks; ipin++)
02212 {
02213 temp_delay =
02214 comp_td_point_to_point_delay(inet, ipin);
02215
02216 temp_point_to_point_delay_cost[inet][ipin] =
02217 temp_delay;
02218 temp_point_to_point_timing_cost[inet][ipin] =
02219 timing_place_crit[inet][ipin] * temp_delay;
02220
02221 delta_delay_cost +=
02222 temp_point_to_point_delay_cost[inet][ipin] -
02223 point_to_point_delay_cost[inet][ipin];
02224
02225 delta_timing_cost +=
02226 temp_point_to_point_timing_cost[inet][ipin] -
02227 point_to_point_timing_cost[inet][ipin];
02228 }
02229 }
02230 }
02231
02232 if(b_to != EMPTY)
02233 {
02234 for(k = 0; k < num_of_pins; k++)
02235 {
02236 inet = block[b_to].nets[k];
02237
02238 if(inet == OPEN)
02239 continue;
02240
02241 if(net[inet].is_global)
02242 continue;
02243
02244 net_pin = net_pin_index[b_to][k];
02245
02246 if(net_pin != 0)
02247 {
02248
02249
02250
02251
02252
02253
02254 if(net[inet].node_block[0] != b_to
02255 && net[inet].node_block[0] != b_from)
02256 {
02257 temp_delay =
02258 comp_td_point_to_point_delay(inet,
02259 net_pin);
02260
02261 temp_point_to_point_delay_cost[inet]
02262 [net_pin] = temp_delay;
02263 temp_point_to_point_timing_cost[inet]
02264 [net_pin] =
02265 timing_place_crit[inet][net_pin] *
02266 temp_delay;
02267
02268 delta_delay_cost +=
02269 temp_point_to_point_delay_cost[inet]
02270 [net_pin] -
02271 point_to_point_delay_cost[inet]
02272 [net_pin];
02273 delta_timing_cost +=
02274 temp_point_to_point_timing_cost[inet]
02275 [net_pin] -
02276 point_to_point_timing_cost[inet]
02277 [net_pin];
02278 }
02279 }
02280 else
02281 {
02282
02283 for(ipin = 1; ipin <= net[inet].num_sinks; ipin++)
02284 {
02285
02286 temp_delay =
02287 comp_td_point_to_point_delay(inet,
02288 ipin);
02289
02290 temp_point_to_point_delay_cost[inet][ipin]
02291 = temp_delay;
02292 temp_point_to_point_timing_cost[inet]
02293 [ipin] =
02294 timing_place_crit[inet][ipin] *
02295 temp_delay;
02296
02297
02298 delta_delay_cost +=
02299 temp_point_to_point_delay_cost[inet]
02300 [ipin] -
02301 point_to_point_delay_cost[inet][ipin];
02302 delta_timing_cost +=
02303 temp_point_to_point_timing_cost[inet]
02304 [ipin] -
02305 point_to_point_timing_cost[inet]
02306 [ipin];
02307 }
02308 }
02309 }
02310 }
02311
02312 *delta_timing = delta_timing_cost;
02313 *delta_delay = delta_delay_cost;
02314 }
02315
02316 static void
02317 comp_td_costs(float *timing_cost,
02318 float *connection_delay_sum)
02319 {
02320
02321
02322
02323
02324 int inet, ipin;
02325 float loc_timing_cost, loc_connection_delay_sum, temp_delay_cost,
02326 temp_timing_cost;
02327
02328 loc_timing_cost = 0.;
02329 loc_connection_delay_sum = 0.;
02330
02331 for(inet = 0; inet < num_nets; inet++)
02332 {
02333 if(net[inet].is_global == FALSE)
02334 {
02335
02336 for(ipin = 1; ipin <= net[inet].num_sinks; ipin++)
02337 {
02338
02339 temp_delay_cost =
02340 comp_td_point_to_point_delay(inet, ipin);
02341 temp_timing_cost =
02342 temp_delay_cost *
02343 timing_place_crit[inet][ipin];
02344
02345 loc_connection_delay_sum += temp_delay_cost;
02346 point_to_point_delay_cost[inet][ipin] =
02347 temp_delay_cost;
02348 temp_point_to_point_delay_cost[inet][ipin] = -1;
02349
02350 point_to_point_timing_cost[inet][ipin] =
02351 temp_timing_cost;
02352 temp_point_to_point_timing_cost[inet][ipin] = -1;
02353 loc_timing_cost += temp_timing_cost;
02354 }
02355 }
02356 }
02357 *timing_cost = loc_timing_cost;
02358 *connection_delay_sum = loc_connection_delay_sum;
02359 }
02360
02361
02362 static float
02363 comp_bb_cost(int method,
02364 int place_cost_type,
02365 int num_regions)
02366 {
02367
02368
02369
02370
02371
02372
02373
02374
02375
02376
02377 int i, j, k;
02378 float cost;
02379 double expected_wirelength;
02380
02381 cost = 0;
02382 expected_wirelength = 0.0;
02383
02384
02385
02386 if(place_cost_type == NONLINEAR_CONG)
02387 {
02388 for(i = 0; i < num_regions; i++)
02389 {
02390 for(j = 0; j < num_regions; j++)
02391 {
02392 place_region_x[i][j].occupancy = 0.;
02393 place_region_y[i][j].occupancy = 0.;
02394 }
02395 }
02396 }
02397
02398 for(k = 0; k < num_nets; k++)
02399 {
02400
02401 if(net[k].is_global == FALSE)
02402 {
02403
02404
02405
02406
02407 if(net[k].num_sinks >= SMALL_NET && method == NORMAL)
02408 {
02409 get_bb_from_scratch(k, &bb_coords[k],
02410 &bb_num_on_edges[k]);
02411 }
02412 else
02413 {
02414 get_non_updateable_bb(k, &bb_coords[k]);
02415 }
02416
02417 if(place_cost_type != NONLINEAR_CONG)
02418 {
02419 net_cost[k] = get_net_cost(k, &bb_coords[k]);
02420 cost += net_cost[k];
02421 if(method == CHECK)
02422 expected_wirelength +=
02423 get_net_wirelength_estimate(k,
02424 &bb_coords
02425 [k]);
02426 }
02427 else
02428 {
02429 update_region_occ(k, &bb_coords[k], 1,
02430 num_regions);
02431 }
02432 }
02433 }
02434
02435 if(place_cost_type == NONLINEAR_CONG)
02436 {
02437 cost = nonlinear_cong_cost(num_regions);
02438 }
02439
02440 if(method == CHECK)
02441 printf("BB estimate of min-dist (placement) wirelength is ;%.0f\n",
02442 expected_wirelength);
02443
02444 return (cost);
02445 }
02446
02447
02448 static float
02449 nonlinear_cong_cost(int num_regions)
02450 {
02451
02452
02453
02454
02455
02456
02457
02458
02459
02460
02461 float cost, tmp;
02462 int i, j;
02463
02464 cost = 0.;
02465
02466 for(i = 0; i < num_regions; i++)
02467 {
02468 for(j = 0; j < num_regions; j++)
02469 {
02470
02471
02472
02473 if(place_region_x[i][j].occupancy <
02474 place_region_x[i][j].capacity)
02475 {
02476 cost += place_region_x[i][j].occupancy *
02477 place_region_x[i][j].inv_capacity;
02478 }
02479 else
02480 {
02481
02482 tmp = place_region_x[i][j].occupancy *
02483 place_region_x[i][j].inv_capacity;
02484 cost += tmp * tmp;
02485 }
02486
02487 if(place_region_y[i][j].occupancy <
02488 place_region_y[i][j].capacity)
02489 {
02490 cost += place_region_y[i][j].occupancy *
02491 place_region_y[i][j].inv_capacity;
02492 }
02493 else
02494 {
02495
02496 tmp = place_region_y[i][j].occupancy *
02497 place_region_y[i][j].inv_capacity;
02498 cost += tmp * tmp;
02499 }
02500
02501 }
02502 }
02503
02504 return (cost);
02505 }
02506
02507
02508 static void
02509 update_region_occ(int inet,
02510 struct s_bb *coords,
02511 int add_or_sub,
02512 int num_regions)
02513 {
02514
02515
02516
02517
02518
02519
02520 float net_xmin, net_xmax, net_ymin, net_ymax, crossing;
02521 float inv_region_len, inv_region_height;
02522 float inv_bb_len, inv_bb_height;
02523 float overlap_xlow, overlap_xhigh, overlap_ylow, overlap_yhigh;
02524 float y_overlap, x_overlap, x_occupancy, y_occupancy;
02525 int imin, imax, jmin, jmax, i, j;
02526
02527 if(net[inet].num_sinks >= 50)
02528 {
02529 crossing = 2.7933 + 0.02616 * ((net[inet].num_sinks + 1) - 50);
02530 }
02531 else
02532 {
02533 crossing = cross_count[net[inet].num_sinks];
02534 }
02535
02536 net_xmin = coords->xmin - 0.5;
02537 net_xmax = coords->xmax + 0.5;
02538 net_ymin = coords->ymin - 0.5;
02539 net_ymax = coords->ymax + 0.5;
02540
02541
02542
02543 inv_region_len = (float)num_regions / (float)nx;
02544 inv_region_height = (float)num_regions / (float)ny;
02545
02546
02547
02548
02549
02550
02551
02552
02553 imin = (int)(net_xmin - 0.5) * inv_region_len;
02554 imax = (int)(net_xmax - 0.5) * inv_region_len;
02555 imax = min(imax, num_regions - 1);
02556
02557 jmin = (int)(net_ymin - 0.5) * inv_region_height;
02558 jmax = (int)(net_ymax - 0.5) * inv_region_height;
02559 jmax = min(jmax, num_regions - 1);
02560
02561 inv_bb_len = 1. / (net_xmax - net_xmin);
02562 inv_bb_height = 1. / (net_ymax - net_ymin);
02563
02564
02565
02566
02567 for(i = imin; i <= imax; i++)
02568 {
02569 for(j = jmin; j <= jmax; j++)
02570 {
02571 overlap_xlow = max(place_region_bounds_x[i], net_xmin);
02572 overlap_xhigh =
02573 min(place_region_bounds_x[i + 1], net_xmax);
02574 overlap_ylow = max(place_region_bounds_y[j], net_ymin);
02575 overlap_yhigh =
02576 min(place_region_bounds_y[j + 1], net_ymax);
02577
02578 x_overlap = overlap_xhigh - overlap_xlow;
02579 y_overlap = overlap_yhigh - overlap_ylow;
02580
02581 #ifdef DEBUG
02582
02583 if(x_overlap < -0.001)
02584 {
02585 printf
02586 ("Error in update_region_occ: x_overlap < 0"
02587 "\n inet = %d, overlap = %g\n", inet,
02588 x_overlap);
02589 }
02590
02591 if(y_overlap < -0.001)
02592 {
02593 printf
02594 ("Error in update_region_occ: y_overlap < 0"
02595 "\n inet = %d, overlap = %g\n", inet,
02596 y_overlap);
02597 }
02598 #endif
02599
02600
02601 x_occupancy =
02602 crossing * y_overlap * x_overlap * inv_bb_height *
02603 inv_region_len;
02604 y_occupancy =
02605 crossing * x_overlap * y_overlap * inv_bb_len *
02606 inv_region_height;
02607
02608 place_region_x[i][j].occupancy +=
02609 add_or_sub * x_occupancy;
02610 place_region_y[i][j].occupancy +=
02611 add_or_sub * y_occupancy;
02612 }
02613 }
02614
02615 }
02616
02617
02618 static void
02619 free_place_regions(int num_regions)
02620 {
02621
02622
02623
02624
02625 free_matrix(place_region_x, 0, num_regions - 1, 0, sizeof(struct
02626 s_place_region));
02627
02628 free_matrix(place_region_y, 0, num_regions - 1, 0, sizeof(struct
02629 s_place_region));
02630
02631 free(place_region_bounds_x);
02632 free(place_region_bounds_y);
02633 }
02634
02635
02636 static void
02637 free_placement_structs(int place_cost_type,
02638 int num_regions,
02639 float **old_region_occ_x,
02640 float **old_region_occ_y,
02641 struct s_placer_opts placer_opts)
02642 {
02643
02644
02645
02646
02647 int inet;
02648
02649 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
02650 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE ||
02651 placer_opts.enable_timing_computations)
02652 {
02653 for(inet = 0; inet < num_nets; inet++)
02654 {
02655
02656
02657 point_to_point_delay_cost[inet]++;
02658 free(point_to_point_delay_cost[inet]);
02659
02660 point_to_point_timing_cost[inet]++;
02661 free(point_to_point_timing_cost[inet]);
02662
02663 temp_point_to_point_delay_cost[inet]++;
02664 free(temp_point_to_point_delay_cost[inet]);
02665
02666 temp_point_to_point_timing_cost[inet]++;
02667 free(temp_point_to_point_timing_cost[inet]);
02668 }
02669 free(point_to_point_delay_cost);
02670 free(temp_point_to_point_delay_cost);
02671
02672 free(point_to_point_timing_cost);
02673 free(temp_point_to_point_timing_cost);
02674
02675 free_matrix(net_pin_index, 0, num_blocks - 1, 0, sizeof(int));
02676 }
02677
02678
02679 free(net_cost);
02680 free(temp_net_cost);
02681 free(bb_num_on_edges);
02682 free(bb_coords);
02683
02684 net_cost = NULL;
02685 temp_net_cost = NULL;
02686 bb_num_on_edges = NULL;
02687 bb_coords = NULL;
02688
02689 free_unique_pin_list();
02690
02691 if(place_cost_type == NONLINEAR_CONG)
02692 {
02693 free_place_regions(num_regions);
02694 free_matrix(old_region_occ_x, 0, num_regions - 1, 0,
02695 sizeof(float));
02696 free_matrix(old_region_occ_y, 0, num_regions - 1, 0,
02697 sizeof(float));
02698 }
02699
02700 else if(place_cost_type == LINEAR_CONG)
02701 {
02702 free_fast_cost_update_structs();
02703 }
02704 }
02705
02706
02707 static void
02708 alloc_and_load_placement_structs(int place_cost_type,
02709 int num_regions,
02710 float place_cost_exp,
02711 float ***old_region_occ_x,
02712 float ***old_region_occ_y,
02713 struct s_placer_opts placer_opts)
02714 {
02715
02716
02717
02718
02719 int inet, ipin, max_pins_per_fb, i;
02720
02721 max_pins_per_fb = 0;
02722 for(i = 0; i < num_types; i++)
02723 {
02724 max_pins_per_fb =
02725 max(max_pins_per_fb, type_descriptors[i].num_pins);
02726 }
02727
02728 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
02729 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE ||
02730 placer_opts.enable_timing_computations)
02731 {
02732
02733
02734 point_to_point_delay_cost =
02735 (float **)my_malloc(num_nets * sizeof(float *));
02736 temp_point_to_point_delay_cost =
02737 (float **)my_malloc(num_nets * sizeof(float *));
02738
02739 point_to_point_timing_cost =
02740 (float **)my_malloc(num_nets * sizeof(float *));
02741 temp_point_to_point_timing_cost =
02742 (float **)my_malloc(num_nets * sizeof(float *));
02743
02744 net_pin_index =
02745 (int **)alloc_matrix(0, num_blocks - 1, 0,
02746 max_pins_per_fb - 1, sizeof(int));
02747
02748 for(inet = 0; inet < num_nets; inet++)
02749 {
02750
02751
02752
02753 point_to_point_delay_cost[inet] =
02754 (float *)my_malloc(net[inet].num_sinks *
02755 sizeof(float));
02756 point_to_point_delay_cost[inet]--;
02757
02758 temp_point_to_point_delay_cost[inet] =
02759 (float *)my_malloc(net[inet].num_sinks *
02760 sizeof(float));
02761 temp_point_to_point_delay_cost[inet]--;
02762
02763 point_to_point_timing_cost[inet] =
02764 (float *)my_malloc(net[inet].num_sinks *
02765 sizeof(float));
02766 point_to_point_timing_cost[inet]--;
02767
02768 temp_point_to_point_timing_cost[inet] =
02769 (float *)my_malloc(net[inet].num_sinks *
02770 sizeof(float));
02771 temp_point_to_point_timing_cost[inet]--;
02772 }
02773 for(inet = 0; inet < num_nets; inet++)
02774 {
02775 for(ipin = 1; ipin <= net[inet].num_sinks; ipin++)
02776 {
02777 point_to_point_delay_cost[inet][ipin] = 0;
02778 temp_point_to_point_delay_cost[inet][ipin] = 0;
02779 }
02780 }
02781 }
02782
02783
02784
02785
02786
02787 net_cost = (float *)my_malloc(num_nets * sizeof(float));
02788 temp_net_cost = (float *)my_malloc(num_nets * sizeof(float));
02789
02790
02791
02792
02793
02794 for(inet = 0; inet < num_nets; inet++)
02795 temp_net_cost[inet] = -1.;
02796
02797 bb_coords = (struct s_bb *)my_malloc(num_nets * sizeof(struct s_bb));
02798 bb_num_on_edges =
02799 (struct s_bb *)my_malloc(num_nets * sizeof(struct s_bb));
02800
02801
02802
02803 alloc_and_load_unique_pin_list();
02804
02805
02806
02807 if(place_cost_type == NONLINEAR_CONG)
02808 {
02809 alloc_place_regions(num_regions);
02810 load_place_regions(num_regions);
02811 *old_region_occ_x = (float **)alloc_matrix(0, num_regions - 1, 0,
02812 num_regions - 1,
02813 sizeof(float));
02814 *old_region_occ_y =
02815 (float **)alloc_matrix(0, num_regions - 1, 0, num_regions - 1,
02816 sizeof(float));
02817 }
02818 else
02819 {
02820 *old_region_occ_x = NULL;
02821 *old_region_occ_y = NULL;
02822 }
02823
02824 if(place_cost_type == LINEAR_CONG)
02825 {
02826 alloc_and_load_for_fast_cost_update(place_cost_exp);
02827 }
02828 }
02829
02830
02831 static void
02832 alloc_place_regions(int num_regions)
02833 {
02834
02835
02836
02837
02838
02839 place_region_x =
02840 (struct s_place_region **)alloc_matrix(0, num_regions - 1, 0,
02841 num_regions - 1,
02842 sizeof(struct s_place_region));
02843
02844 place_region_y =
02845 (struct s_place_region **)alloc_matrix(0, num_regions - 1, 0,
02846 num_regions - 1,
02847 sizeof(struct s_place_region));
02848
02849 place_region_bounds_x = (float *)my_malloc((num_regions + 1) *
02850 sizeof(float));
02851
02852 place_region_bounds_y = (float *)my_malloc((num_regions + 1) *
02853 sizeof(float));
02854 }
02855
02856
02857 static void
02858 load_place_regions(int num_regions)
02859 {
02860
02861
02862
02863
02864 int i, j, low_block, high_block, rnum;
02865 float low_lim, high_lim, capacity, fac, block_capacity;
02866 float len_fac, height_fac;
02867
02868
02869
02870 for(j = 0; j < num_regions; j++)
02871 {
02872 capacity = 0.;
02873 low_lim = (float)j / (float)num_regions *ny + 1.;
02874 high_lim = (float)(j + 1) / (float)num_regions *ny;
02875
02876 low_block = floor(low_lim);
02877 low_block = max(1, low_block);
02878 high_block = ceil(high_lim);
02879 high_block = min(high_block, ny);
02880
02881 block_capacity = (chan_width_x[low_block - 1] +
02882 chan_width_x[low_block]) / 2.;
02883 if(low_block == 1)
02884 block_capacity += chan_width_x[0] / 2.;
02885
02886 fac = 1. - (low_lim - low_block);
02887 capacity += fac * block_capacity;
02888
02889 for(rnum = low_block + 1; rnum < high_block; rnum++)
02890 {
02891 block_capacity =
02892 (chan_width_x[rnum - 1] + chan_width_x[rnum]) / 2.;
02893 capacity += block_capacity;
02894 }
02895
02896 block_capacity = (chan_width_x[high_block - 1] +
02897 chan_width_x[high_block]) / 2.;
02898 if(high_block == ny)
02899 block_capacity += chan_width_x[ny] / 2.;
02900
02901 fac = 1. - (high_block - high_lim);
02902 capacity += fac * block_capacity;
02903
02904 for(i = 0; i < num_regions; i++)
02905 {
02906 place_region_x[i][j].capacity = capacity;
02907 place_region_x[i][j].inv_capacity = 1. / capacity;
02908 place_region_x[i][j].occupancy = 0.;
02909 place_region_x[i][j].cost = 0.;
02910 }
02911 }
02912
02913
02914
02915 for(i = 0; i < num_regions; i++)
02916 {
02917 capacity = 0.;
02918 low_lim = (float)i / (float)num_regions *nx + 1.;
02919 high_lim = (float)(i + 1) / (float)num_regions *nx;
02920
02921 low_block = floor(low_lim);
02922 low_block = max(1, low_block);
02923 high_block = ceil(high_lim);
02924 high_block = min(high_block, nx);
02925
02926 block_capacity = (chan_width_y[low_block - 1] +
02927 chan_width_y[low_block]) / 2.;
02928 if(low_block == 1)
02929 block_capacity += chan_width_y[0] / 2.;
02930
02931 fac = 1. - (low_lim - low_block);
02932 capacity += fac * block_capacity;
02933
02934 for(rnum = low_block + 1; rnum < high_block; rnum++)
02935 {
02936 block_capacity =
02937 (chan_width_y[rnum - 1] + chan_width_y[rnum]) / 2.;
02938 capacity += block_capacity;
02939 }
02940
02941 block_capacity = (chan_width_y[high_block - 1] +
02942 chan_width_y[high_block]) / 2.;
02943 if(high_block == nx)
02944 block_capacity += chan_width_y[nx] / 2.;
02945
02946 fac = 1. - (high_block - high_lim);
02947 capacity += fac * block_capacity;
02948
02949 for(j = 0; j < num_regions; j++)
02950 {
02951 place_region_y[i][j].capacity = capacity;
02952 place_region_y[i][j].inv_capacity = 1. / capacity;
02953 place_region_y[i][j].occupancy = 0.;
02954 place_region_y[i][j].cost = 0.;
02955 }
02956 }
02957
02958
02959
02960
02961 len_fac = (float)nx / (float)num_regions;
02962 height_fac = (float)ny / (float)num_regions;
02963
02964 place_region_bounds_x[0] = 0.5;
02965 place_region_bounds_y[0] = 0.5;
02966
02967 for(i = 1; i <= num_regions; i++)
02968 {
02969 place_region_bounds_x[i] = place_region_bounds_x[i - 1] + len_fac;
02970 place_region_bounds_y[i] =
02971 place_region_bounds_y[i - 1] + height_fac;
02972 }
02973 }
02974
02975
02976 static void
02977 free_unique_pin_list(void)
02978 {
02979
02980
02981
02982 int any_dup, inet;
02983
02984 any_dup = 0;
02985
02986 for(inet = 0; inet < num_nets; inet++)
02987 {
02988 if(duplicate_pins[inet] != 0)
02989 {
02990 free(unique_pin_list[inet]);
02991 any_dup = 1;
02992 }
02993 }
02994
02995 if(any_dup != 0)
02996 free(unique_pin_list);
02997
02998 free(duplicate_pins);
02999 }
03000
03001
03002 static void
03003 alloc_and_load_unique_pin_list(void)
03004 {
03005
03006
03007
03008
03009
03010
03011
03012 int inet, ipin, bnum, num_dup, any_dups, offset;
03013 int *times_listed;
03014
03015
03016 duplicate_pins = my_calloc(num_nets, sizeof(int));
03017 times_listed = my_calloc(num_blocks, sizeof(int));
03018 any_dups = 0;
03019
03020 for(inet = 0; inet < num_nets; inet++)
03021 {
03022
03023 num_dup = 0;
03024
03025 for(ipin = 0; ipin <= net[inet].num_sinks; ipin++)
03026 {
03027 bnum = net[inet].node_block[ipin];
03028 times_listed[bnum]++;
03029 if(times_listed[bnum] > 1)
03030 num_dup++;
03031 }
03032
03033 if(num_dup > 0)
03034 {
03035 duplicate_pins[inet] = num_dup;
03036
03037 if(any_dups == 0)
03038 {
03039 unique_pin_list =
03040 (int **)my_calloc(num_nets, sizeof(int *));
03041 any_dups = 1;
03042 }
03043
03044 unique_pin_list[inet] =
03045 my_malloc((net[inet].num_sinks + 1 -
03046 num_dup) * sizeof(int));
03047
03048 offset = 0;
03049 for(ipin = 0; ipin <= net[inet].num_sinks; ipin++)
03050 {
03051 bnum = net[inet].node_block[ipin];
03052 if(times_listed[bnum] != 0)
03053 {
03054 times_listed[bnum] = 0;
03055 unique_pin_list[inet][offset] = bnum;
03056 offset++;
03057 }
03058 }
03059 }
03060
03061 else
03062 {
03063 for(ipin = 0; ipin <= net[inet].num_sinks; ipin++)
03064 {
03065 bnum = net[inet].node_block[ipin];
03066 times_listed[bnum] = 0;
03067 }
03068 }
03069 }
03070
03071 free((void *)times_listed);
03072 }
03073
03074
03075 static void
03076 get_bb_from_scratch(int inet,
03077 struct s_bb *coords,
03078 struct s_bb *num_on_edges)
03079 {
03080
03081
03082
03083
03084
03085
03086 int ipin, bnum, x, y, xmin, xmax, ymin, ymax;
03087 int xmin_edge, xmax_edge, ymin_edge, ymax_edge;
03088 int n_pins;
03089 int *plist;
03090
03091
03092
03093
03094
03095 if(duplicate_pins[inet] == 0)
03096 {
03097 plist = net[inet].node_block;
03098 n_pins = net[inet].num_sinks + 1;
03099 }
03100 else
03101 {
03102 plist = unique_pin_list[inet];
03103 n_pins = (net[inet].num_sinks + 1) - duplicate_pins[inet];
03104 }
03105
03106 x = block[plist[0]].x;
03107 y = block[plist[0]].y;
03108
03109 x = max(min(x, nx), 1);
03110 y = max(min(y, ny), 1);
03111
03112 xmin = x;
03113 ymin = y;
03114 xmax = x;
03115 ymax = y;
03116 xmin_edge = 1;
03117 ymin_edge = 1;
03118 xmax_edge = 1;
03119 ymax_edge = 1;
03120
03121 for(ipin = 1; ipin < n_pins; ipin++)
03122 {
03123
03124 bnum = plist[ipin];
03125 x = block[bnum].x;
03126 y = block[bnum].y;
03127
03128
03129
03130
03131
03132
03133
03134
03135 x = max(min(x, nx), 1);
03136 y = max(min(y, ny), 1);
03137
03138 if(x == xmin)
03139 {
03140 xmin_edge++;
03141 }
03142 if(x == xmax)
03143 {
03144 xmax_edge++;
03145 }
03146 else if(x < xmin)
03147 {
03148 xmin = x;
03149 xmin_edge = 1;
03150 }
03151 else if(x > xmax)
03152 {
03153 xmax = x;
03154 xmax_edge = 1;
03155 }
03156
03157 if(y == ymin)
03158 {
03159 ymin_edge++;
03160 }
03161 if(y == ymax)
03162 {
03163 ymax_edge++;
03164 }
03165 else if(y < ymin)
03166 {
03167 ymin = y;
03168 ymin_edge = 1;
03169 }
03170 else if(y > ymax)
03171 {
03172 ymax = y;
03173 ymax_edge = 1;
03174 }
03175 }
03176
03177
03178
03179
03180 coords->xmin = xmin;
03181 coords->xmax = xmax;
03182 coords->ymin = ymin;
03183 coords->ymax = ymax;
03184
03185 num_on_edges->xmin = xmin_edge;
03186 num_on_edges->xmax = xmax_edge;
03187 num_on_edges->ymin = ymin_edge;
03188 num_on_edges->ymax = ymax_edge;
03189 }
03190
03191
03192 static double
03193 get_net_wirelength_estimate(int inet,
03194 struct s_bb *bbptr)
03195 {
03196
03197
03198
03199
03200 double ncost, crossing;
03201
03202
03203
03204
03205 if(((net[inet].num_sinks + 1) > 50) && ((net[inet].num_sinks + 1) < 85))
03206 {
03207 crossing = 2.7933 + 0.02616 * ((net[inet].num_sinks + 1) - 50);
03208 }
03209 else if((net[inet].num_sinks + 1) >= 85)
03210 {
03211 crossing =
03212 2.7933 + 0.011 * (net[inet].num_sinks + 1) -
03213 0.0000018 * (net[inet].num_sinks + 1) * (net[inet].num_sinks +
03214 1);
03215 }
03216 else
03217 {
03218 crossing = cross_count[(net[inet].num_sinks + 1) - 1];
03219 }
03220
03221
03222
03223
03224
03225
03226
03227
03228 ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing;
03229
03230 ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing;
03231
03232 return (ncost);
03233 }
03234
03235
03236 static float
03237 get_net_cost(int inet,
03238 struct s_bb *bbptr)
03239 {
03240
03241
03242
03243
03244 float ncost, crossing;
03245
03246
03247
03248
03249 if((net[inet].num_sinks + 1) > 50)
03250 {
03251 crossing = 2.7933 + 0.02616 * ((net[inet].num_sinks + 1) - 50);
03252
03253 }
03254 else
03255 {
03256 crossing = cross_count[(net[inet].num_sinks + 1) - 1];
03257 }
03258
03259
03260
03261
03262
03263
03264
03265
03266 ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing *
03267 chanx_place_cost_fac[bbptr->ymax][bbptr->ymin - 1];
03268
03269 ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing *
03270 chany_place_cost_fac[bbptr->xmax][bbptr->xmin - 1];
03271
03272 return (ncost);
03273 }
03274
03275
03276 static void
03277 get_non_updateable_bb(int inet,
03278 struct s_bb *bb_coord_new)
03279 {
03280
03281
03282
03283
03284
03285
03286
03287
03288
03289
03290 int k, xmax, ymax, xmin, ymin, x, y;
03291
03292 x = block[net[inet].node_block[0]].x;
03293 y = block[net[inet].node_block[0]].y;
03294
03295 xmin = x;
03296 ymin = y;
03297 xmax = x;
03298 ymax = y;
03299
03300 for(k = 1; k < (net[inet].num_sinks + 1); k++)
03301 {
03302 x = block[net[inet].node_block[k]].x;
03303 y = block[net[inet].node_block[k]].y;
03304
03305 if(x < xmin)
03306 {
03307 xmin = x;
03308 }
03309 else if(x > xmax)
03310 {
03311 xmax = x;
03312 }
03313
03314 if(y < ymin)
03315 {
03316 ymin = y;
03317 }
03318 else if(y > ymax)
03319 {
03320 ymax = y;
03321 }
03322 }
03323
03324
03325
03326
03327
03328
03329
03330
03331 bb_coord_new->xmin = max(min(xmin, nx), 1);
03332 bb_coord_new->ymin = max(min(ymin, ny), 1);
03333 bb_coord_new->xmax = max(min(xmax, nx), 1);
03334 bb_coord_new->ymax = max(min(ymax, ny), 1);
03335 }
03336
03337
03338 static void
03339 update_bb(int inet,
03340 struct s_bb *bb_coord_new,
03341 struct s_bb *bb_edge_new,
03342 int xold,
03343 int yold,
03344 int xnew,
03345 int ynew)
03346 {
03347
03348
03349
03350
03351
03352
03353
03354
03355
03356
03357
03358
03359
03360
03361 xnew = max(min(xnew, nx), 1);
03362 ynew = max(min(ynew, ny), 1);
03363 xold = max(min(xold, nx), 1);
03364 yold = max(min(yold, ny), 1);
03365
03366
03367
03368 if(xnew < xold)
03369 {
03370
03371
03372
03373 if(xold == bb_coords[inet].xmax)
03374 {
03375 if(bb_num_on_edges[inet].xmax == 1)
03376 {
03377 get_bb_from_scratch(inet, bb_coord_new,
03378 bb_edge_new);
03379 return;
03380 }
03381 else
03382 {
03383 bb_edge_new->xmax =
03384 bb_num_on_edges[inet].xmax - 1;
03385 bb_coord_new->xmax = bb_coords[inet].xmax;
03386 }
03387 }
03388
03389 else
03390 {
03391 bb_coord_new->xmax = bb_coords[inet].xmax;
03392 bb_edge_new->xmax = bb_num_on_edges[inet].xmax;
03393 }
03394
03395
03396
03397 if(xnew < bb_coords[inet].xmin)
03398 {
03399 bb_coord_new->xmin = xnew;
03400 bb_edge_new->xmin = 1;
03401 }
03402
03403 else if(xnew == bb_coords[inet].xmin)
03404 {
03405 bb_coord_new->xmin = xnew;
03406 bb_edge_new->xmin = bb_num_on_edges[inet].xmin + 1;
03407 }
03408
03409 else
03410 {
03411 bb_coord_new->xmin = bb_coords[inet].xmin;
03412 bb_edge_new->xmin = bb_num_on_edges[inet].xmin;
03413 }
03414 }
03415
03416
03417 else if(xnew > xold)
03418 {
03419
03420
03421
03422 if(xold == bb_coords[inet].xmin)
03423 {
03424 if(bb_num_on_edges[inet].xmin == 1)
03425 {
03426 get_bb_from_scratch(inet, bb_coord_new,
03427 bb_edge_new);
03428 return;
03429 }
03430 else
03431 {
03432 bb_edge_new->xmin =
03433 bb_num_on_edges[inet].xmin - 1;
03434 bb_coord_new->xmin = bb_coords[inet].xmin;
03435 }
03436 }
03437
03438 else
03439 {
03440 bb_coord_new->xmin = bb_coords[inet].xmin;
03441 bb_edge_new->xmin = bb_num_on_edges[inet].xmin;
03442 }
03443
03444
03445
03446 if(xnew > bb_coords[inet].xmax)
03447 {
03448 bb_coord_new->xmax = xnew;
03449 bb_edge_new->xmax = 1;
03450 }
03451
03452 else if(xnew == bb_coords[inet].xmax)
03453 {
03454 bb_coord_new->xmax = xnew;
03455 bb_edge_new->xmax = bb_num_on_edges[inet].xmax + 1;
03456 }
03457
03458 else
03459 {
03460 bb_coord_new->xmax = bb_coords[inet].xmax;
03461 bb_edge_new->xmax = bb_num_on_edges[inet].xmax;
03462 }
03463 }
03464
03465 else
03466 {
03467 bb_coord_new->xmin = bb_coords[inet].xmin;
03468 bb_coord_new->xmax = bb_coords[inet].xmax;
03469 bb_edge_new->xmin = bb_num_on_edges[inet].xmin;
03470 bb_edge_new->xmax = bb_num_on_edges[inet].xmax;
03471 }
03472
03473
03474
03475 if(ynew < yold)
03476 {
03477
03478
03479
03480 if(yold == bb_coords[inet].ymax)
03481 {
03482 if(bb_num_on_edges[inet].ymax == 1)
03483 {
03484 get_bb_from_scratch(inet, bb_coord_new,
03485 bb_edge_new);
03486 return;
03487 }
03488 else
03489 {
03490 bb_edge_new->ymax =
03491 bb_num_on_edges[inet].ymax - 1;
03492 bb_coord_new->ymax = bb_coords[inet].ymax;
03493 }
03494 }
03495
03496 else
03497 {
03498 bb_coord_new->ymax = bb_coords[inet].ymax;
03499 bb_edge_new->ymax = bb_num_on_edges[inet].ymax;
03500 }
03501
03502
03503
03504 if(ynew < bb_coords[inet].ymin)
03505 {
03506 bb_coord_new->ymin = ynew;
03507 bb_edge_new->ymin = 1;
03508 }
03509
03510 else if(ynew == bb_coords[inet].ymin)
03511 {
03512 bb_coord_new->ymin = ynew;
03513 bb_edge_new->ymin = bb_num_on_edges[inet].ymin + 1;
03514 }
03515
03516 else
03517 {
03518 bb_coord_new->ymin = bb_coords[inet].ymin;
03519 bb_edge_new->ymin = bb_num_on_edges[inet].ymin;
03520 }
03521 }
03522
03523 else if(ynew > yold)
03524 {
03525
03526
03527
03528 if(yold == bb_coords[inet].ymin)
03529 {
03530 if(bb_num_on_edges[inet].ymin == 1)
03531 {
03532 get_bb_from_scratch(inet, bb_coord_new,
03533 bb_edge_new);
03534 return;
03535 }
03536 else
03537 {
03538 bb_edge_new->ymin =
03539 bb_num_on_edges[inet].ymin - 1;
03540 bb_coord_new->ymin = bb_coords[inet].ymin;
03541 }
03542 }
03543
03544 else
03545 {
03546 bb_coord_new->ymin = bb_coords[inet].ymin;
03547 bb_edge_new->ymin = bb_num_on_edges[inet].ymin;
03548 }
03549
03550
03551
03552 if(ynew > bb_coords[inet].ymax)
03553 {
03554 bb_coord_new->ymax = ynew;
03555 bb_edge_new->ymax = 1;
03556 }
03557
03558 else if(ynew == bb_coords[inet].ymax)
03559 {
03560 bb_coord_new->ymax = ynew;
03561 bb_edge_new->ymax = bb_num_on_edges[inet].ymax + 1;
03562 }
03563
03564 else
03565 {
03566 bb_coord_new->ymax = bb_coords[inet].ymax;
03567 bb_edge_new->ymax = bb_num_on_edges[inet].ymax;
03568 }
03569 }
03570
03571 else
03572 {
03573 bb_coord_new->ymin = bb_coords[inet].ymin;
03574 bb_coord_new->ymax = bb_coords[inet].ymax;
03575 bb_edge_new->ymin = bb_num_on_edges[inet].ymin;
03576 bb_edge_new->ymax = bb_num_on_edges[inet].ymax;
03577 }
03578 }
03579
03580
03581 static void
03582 initial_placement(enum e_pad_loc_type pad_loc_type,
03583 char *pad_loc_file)
03584 {
03585
03586
03587 struct s_pos
03588 {
03589 int x;
03590 int y;
03591 int z;
03592 }
03593 **pos;
03594 int i, j, k, iblk, choice, type_index, x, y, z;
03595 int *count, *index;
03596
03597 pos = (struct s_pos **)my_malloc(num_types * sizeof(struct s_pos *));
03598 count = (int *)my_calloc(num_types, sizeof(int));
03599 index = (int *)my_calloc(num_types, sizeof(int));
03600
03601
03602
03603 for(i = 0; i <= nx + 1; i++)
03604 {
03605 for(j = 0; j <= ny + 1; j++)
03606 {
03607 grid[i][j].usage = 0;
03608 for(k = 0; k < grid[i][j].type->capacity; k++)
03609 {
03610 grid[i][j].blocks[k] = EMPTY;
03611 if(grid[i][j].offset == 0)
03612 {
03613 count[grid[i][j].type->index]++;
03614 }
03615 }
03616 }
03617 }
03618
03619 for(i = 0; i < num_types; i++)
03620 {
03621 pos[i] =
03622 (struct s_pos *)my_malloc(count[i] * sizeof(struct s_pos));
03623 }
03624
03625 for(i = 0; i <= nx + 1; i++)
03626 {
03627 for(j = 0; j <= ny + 1; j++)
03628 {
03629 for(k = 0; k < grid[i][j].type->capacity; k++)
03630 {
03631 if(grid[i][j].offset == 0)
03632 {
03633 type_index = grid[i][j].type->index;
03634 pos[type_index][index[type_index]].x = i;
03635 pos[type_index][index[type_index]].y = j;
03636 pos[type_index][index[type_index]].z = k;
03637 index[type_index]++;
03638 }
03639 }
03640 }
03641 }
03642
03643 for(iblk = 0; iblk < num_blocks; iblk++)
03644 {
03645
03646 if(!(block[iblk].type == IO_TYPE && pad_loc_type == USER))
03647 {
03648 type_index = block[iblk].type->index;
03649 assert(count[type_index] > 0);
03650 choice = my_irand(count[type_index] - 1);
03651 x = pos[type_index][choice].x;
03652 y = pos[type_index][choice].y;
03653 z = pos[type_index][choice].z;
03654 grid[x][y].blocks[z] = iblk;
03655 grid[x][y].usage++;
03656
03657
03658 pos[type_index][choice] = pos[type_index][count[type_index] - 1];
03659 count[type_index]--;
03660 }
03661 }
03662
03663 if(pad_loc_type == USER)
03664 {
03665 read_user_pad_loc(pad_loc_file);
03666 }
03667
03668
03669
03670
03671 for(i = 0; i <= (nx + 1); i++)
03672 {
03673 for(j = 0; j <= (ny + 1); j++)
03674 {
03675 for(k = 0; k < grid[i][j].type->capacity; k++)
03676 {
03677 assert(grid[i][j].blocks != NULL);
03678 iblk = grid[i][j].blocks[k];
03679
03680 if(iblk != EMPTY)
03681 {
03682 block[iblk].x = i;
03683 block[iblk].y = j;
03684 block[iblk].z = k;
03685 }
03686 }
03687 }
03688 }
03689
03690 #ifdef VERBOSE
03691 printf("At end of initial_placement.\n");
03692 dump_clbs();
03693 #endif
03694 for(i = 0; i < num_types; i++)
03695 {
03696 free(pos[i]);
03697 }
03698 free(pos);
03699 free(index);
03700 free(count);
03701 }
03702
03703
03704 static void
03705 free_fast_cost_update_structs(void)
03706 {
03707
03708
03709
03710
03711 int i;
03712
03713 for(i = 0; i <= ny; i++)
03714 free(chanx_place_cost_fac[i]);
03715
03716 free(chanx_place_cost_fac);
03717
03718 for(i = 0; i <= nx; i++)
03719 free(chany_place_cost_fac[i]);
03720
03721 free(chany_place_cost_fac);
03722 }
03723
03724
03725 static void
03726 alloc_and_load_for_fast_cost_update(float place_cost_exp)
03727 {
03728
03729
03730
03731
03732
03733
03734
03735
03736
03737
03738
03739
03740
03741 int low, high, i;
03742
03743
03744
03745
03746
03747 chanx_place_cost_fac = (float **)my_malloc((ny + 1) * sizeof(float *));
03748 for(i = 0; i <= ny; i++)
03749 chanx_place_cost_fac[i] = (float *)my_malloc((i + 1) * sizeof(float));
03750
03751 chany_place_cost_fac = (float **)my_malloc((nx + 1) * sizeof(float *));
03752 for(i = 0; i <= nx; i++)
03753 chany_place_cost_fac[i] = (float *)my_malloc((i + 1) * sizeof(float));
03754
03755
03756
03757
03758
03759 chanx_place_cost_fac[0][0] = chan_width_x[0];
03760
03761 for(high = 1; high <= ny; high++)
03762 {
03763 chanx_place_cost_fac[high][high] = chan_width_x[high];
03764 for(low = 0; low < high; low++)
03765 {
03766 chanx_place_cost_fac[high][low] =
03767 chanx_place_cost_fac[high - 1][low] +
03768 chan_width_x[high];
03769 }
03770 }
03771
03772
03773
03774
03775
03776
03777
03778
03779
03780 for(high = 0; high <= ny; high++)
03781 for(low = 0; low <= high; low++)
03782 {
03783 chanx_place_cost_fac[high][low] = (high - low + 1.) /
03784 chanx_place_cost_fac[high][low];
03785 chanx_place_cost_fac[high][low] =
03786 pow((double)chanx_place_cost_fac[high][low],
03787 (double)place_cost_exp);
03788 }
03789
03790
03791
03792
03793
03794 chany_place_cost_fac[0][0] = chan_width_y[0];
03795
03796 for(high = 1; high <= nx; high++)
03797 {
03798 chany_place_cost_fac[high][high] = chan_width_y[high];
03799 for(low = 0; low < high; low++)
03800 {
03801 chany_place_cost_fac[high][low] =
03802 chany_place_cost_fac[high - 1][low] +
03803 chan_width_y[high];
03804 }
03805 }
03806
03807
03808
03809
03810 for(high = 0; high <= nx; high++)
03811 for(low = 0; low <= high; low++)
03812 {
03813 chany_place_cost_fac[high][low] = (high - low + 1.) /
03814 chany_place_cost_fac[high][low];
03815 chany_place_cost_fac[high][low] =
03816 pow((double)chany_place_cost_fac[high][low],
03817 (double)place_cost_exp);
03818 }
03819 }
03820
03821
03822 static void
03823 check_place(float bb_cost,
03824 float timing_cost,
03825 int place_cost_type,
03826 int num_regions,
03827 enum e_place_algorithm place_algorithm,
03828 float delay_cost)
03829 {
03830
03831
03832
03833
03834
03835
03836
03837 static int *bdone;
03838 int i, j, k, error = 0, bnum;
03839 float bb_cost_check;
03840 int usage_check;
03841 float timing_cost_check, delay_cost_check;
03842
03843 bb_cost_check = comp_bb_cost(CHECK, place_cost_type, num_regions);
03844 printf("bb_cost recomputed from scratch is %g.\n", bb_cost_check);
03845 if(fabs(bb_cost_check - bb_cost) > bb_cost * ERROR_TOL)
03846 {
03847 printf
03848 ("Error: bb_cost_check: %g and bb_cost: %g differ in check_place.\n",
03849 bb_cost_check, bb_cost);
03850 error++;
03851 }
03852
03853 if(place_algorithm == NET_TIMING_DRIVEN_PLACE ||
03854 place_algorithm == PATH_TIMING_DRIVEN_PLACE)
03855 {
03856 comp_td_costs(&timing_cost_check, &delay_cost_check);
03857 printf("timing_cost recomputed from scratch is %g. \n",
03858 timing_cost_check);
03859 if(fabs(timing_cost_check - timing_cost) >
03860 timing_cost * ERROR_TOL)
03861 {
03862 printf("Error: timing_cost_check: %g and timing_cost: "
03863 "%g differ in check_place.\n",
03864 timing_cost_check, timing_cost);
03865 error++;
03866 }
03867 printf("delay_cost recomputed from scratch is %g. \n",
03868 delay_cost_check);
03869 if(fabs(delay_cost_check - delay_cost) > delay_cost * ERROR_TOL)
03870 {
03871 printf("Error: delay_cost_check: %g and delay_cost: "
03872 "%g differ in check_place.\n",
03873 delay_cost_check, delay_cost);
03874 error++;
03875 }
03876 }
03877
03878 bdone = (int *)my_malloc(num_blocks * sizeof(int));
03879 for(i = 0; i < num_blocks; i++)
03880 bdone[i] = 0;
03881
03882
03883 for(i = 0; i <= (nx + 1); i++)
03884 for(j = 0; j <= (ny + 1); j++)
03885 {
03886 if(grid[i][j].usage > grid[i][j].type->capacity)
03887 {
03888 printf
03889 ("Error: block at grid location (%d,%d) overused. "
03890 "Usage is %d\n", i, j, grid[i][j].usage);
03891 error++;
03892 }
03893 usage_check = 0;
03894 for(k = 0; k < grid[i][j].type->capacity; k++)
03895 {
03896 bnum = grid[i][j].blocks[k];
03897 if(EMPTY == bnum)
03898 continue;
03899
03900 if(block[bnum].type != grid[i][j].type)
03901 {
03902 printf
03903 ("Error: block %d type does not match grid location (%d,%d) type.\n",
03904 bnum, i, j);
03905 error++;
03906 }
03907 if((block[bnum].x != i) || (block[bnum].y != j))
03908 {
03909 printf
03910 ("Error: block %d location conflicts with grid(%d,%d)"
03911 "data.\n", bnum, i, j);
03912 error++;
03913 }
03914 ++usage_check;
03915 bdone[bnum]++;
03916 }
03917 if(usage_check != grid[i][j].usage)
03918 {
03919 printf
03920 ("Error: Location (%d,%d) usage is %d, but has actual usage %d.\n",
03921 i, j, grid[i][j].usage, usage_check);
03922 }
03923 }
03924
03925
03926 for(i = 0; i < num_blocks; i++)
03927 if(bdone[i] != 1)
03928 {
03929 printf
03930 ("Error: block %d listed %d times in data structures.\n",
03931 i, bdone[i]);
03932 error++;
03933 }
03934 free(bdone);
03935
03936 if(error == 0)
03937 {
03938 printf
03939 ("\nCompleted placement consistency check successfully.\n\n");
03940 #ifdef PRINT_REL_POS_DISTR
03941 print_relative_pos_distr();
03942 #endif
03943 }
03944 else
03945 {
03946 printf
03947 ("\nCompleted placement consistency check, %d Errors found.\n\n",
03948 error);
03949 printf("Aborting program.\n");
03950 exit(1);
03951 }
03952 }