VPR-6.0

vpr/SRC/place/place.c

Go to the documentation of this file.
00001 /*#include <stdlib.h> */
00002 #include <stdio.h>
00003 #include <math.h>
00004 #include <assert.h>
00005 #include "util.h"
00006 #include "vpr_types.h"
00007 #include "globals.h"
00008 #include "mst.h"
00009 #include "place.h"
00010 #include "read_place.h"
00011 #include "draw.h"
00012 #include "place_and_route.h"
00013 #include "net_delay.h"
00014 #include "path_delay.h"
00015 #include "timing_place_lookup.h"
00016 #include "timing_place.h"
00017 #include "place_stats.h"
00018 #include "read_xml_arch_file.h"
00019 
00020 /************** Types and defines local to place.c ***************************/
00021 
00022 #define SMALL_NET 4             /**< Cut off for incremental bounding box updates. 
00023                                  *   4 is fastest -- I checked.                    */
00024 
00025 
00026 /** For comp_cost.  NORMAL means use the method that generates updateable  
00027  * bounding boxes for speed.  CHECK means compute all bounding boxes from 
00028  * scratch using a very simple routine to allow checks of the other       
00029  * costs.                                                                 
00030  */
00031 enum cost_methods
00032 { NORMAL, CHECK };
00033 
00034 #define FROM 0                  /**< What block connected to a net has moved? */
00035 #define TO 1
00036 #define FROM_AND_TO 2
00037 
00038 #define ERROR_TOL .01
00039 #define MAX_MOVES_BEFORE_RECOMPUTE 50000
00040 
00041 /********************** Variables local to place.c ***************************/
00042 
00043 /** Cost of a net, and a temporary cost of a net used during move assessment. */
00044 static float *net_cost = NULL, *temp_net_cost = NULL;   /**< [0..num_nets-1] */
00045 
00046 /**@{*/
00047 /** [0..num_nets-1][1..num_pins-1]. What is the value of the timing   
00048  * driven portion of the cost function. These arrays will be set to  
00049  * (criticality * delay) for each point to point connection. 
00050  */
00051 static float **point_to_point_timing_cost = NULL;
00052 static float **temp_point_to_point_timing_cost = NULL;
00053 /**@}*/
00054 
00055 /**@{*/
00056 /** [0..num_nets-1][1..num_pins-1]. What is the value of the delay 
00057  * for each connection in the circuit */
00058 static float **point_to_point_delay_cost = NULL;
00059 static float **temp_point_to_point_delay_cost = NULL;
00060 /**@}*/
00061 
00062 
00063 /** [0..num_blocks-1][0..pins_per_clb-1]. Indicates which pin on the net 
00064  * this block corresponds to, this is only required during timing-driven 
00065  * placement. It is used to allow us to update individual connections on 
00066  * each net */
00067 static int **net_pin_index = NULL;
00068 
00069 
00070 /** [0..num_nets-1].  Store the bounding box coordinates and the number of    
00071  * blocks on each of a net's bounding box (to allow efficient updates),      
00072  * respectively.                                                             
00073  */
00074 static struct s_bb *bb_coords = NULL, *bb_num_on_edges = NULL;
00075 
00076 /** Stores the maximum and expected occupancies, plus the cost, of each   
00077  * region in the placement.  Used only by the NONLINEAR_CONG cost        
00078  * function.  [0..num_region-1][0..num_region-1].  Place_region_x and    
00079  * y give the situation for the x and y directed channels, respectively. 
00080  */
00081 static struct s_place_region **place_region_x, **place_region_y;
00082 
00083 /** Used only with nonlinear congestion.  [0..num_regions].            */
00084 static float *place_region_bounds_x, *place_region_bounds_y;
00085 
00086 /** The arrays below are used to precompute the inverse of the average  
00087  * number of tracks per channel between [subhigh] and [sublow].  Access 
00088  * them as chan?_place_cost_fac[subhigh][sublow].  They are used to     
00089  * speed up the computation of the cost function that takes the length  
00090  * of the net bounding box in each dimension, divided by the average    
00091  * number of tracks in that direction; for other cost functions they    
00092  * will never be used.                                                  
00093  */
00094 static float **chanx_place_cost_fac, **chany_place_cost_fac;
00095 
00096 
00097 /** Expected crossing counts for nets with different #'s of pins.  From 
00098  * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me).   
00099  */
00100 static const float cross_count[50] = {  /**< [0..49] */
00101     1.0, 1.0, 1.0, 1.0828, 1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493,
00102     1.4974, 1.5455, 1.5937, 1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519,
00103     1.8924,
00104     1.9288, 1.9652, 2.0015, 2.0379, 2.0743, 2.1061, 2.1379, 2.1698, 2.2016,
00105     2.2334,
00106     2.2646, 2.2958, 2.3271, 2.3583, 2.3895, 2.4187, 2.4479, 2.4772, 2.5064,
00107     2.5356,
00108     2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410, 2.7671,
00109     2.7933
00110 };
00111 
00112 
00113 /********************* Static subroutines local to place.c *******************/
00114 
00115 static void alloc_place_regions(int num_regions);
00116 
00117 static void load_place_regions(int num_regions);
00118 
00119 static void free_place_regions(int num_regions);
00120 
00121 static void alloc_and_load_placement_structs(int place_cost_type,
00122                                              int num_regions,
00123                                              float place_cost_exp,
00124                                              float ***old_region_occ_x,
00125                                              float ***old_region_occ_y,
00126                                              struct s_placer_opts
00127                                              placer_opts);
00128 
00129 static void free_placement_structs(int place_cost_type,
00130                                    int num_regions,
00131                                    float **old_region_occ_x,
00132                                    float **old_region_occ_y,
00133                                    struct s_placer_opts placer_opts);
00134 
00135 static void alloc_and_load_for_fast_cost_update(float place_cost_exp);
00136 
00137 static void initial_placement(enum e_pad_loc_type pad_loc_type,
00138                               char *pad_loc_file);
00139 
00140 static float comp_bb_cost(int method,
00141                           int place_cost_type,
00142                           int num_regions);
00143 
00144 static int try_swap(float t,
00145                     float *cost,
00146                     float *bb_cost,
00147                     float *timing_cost,
00148                     float rlim,
00149                     int place_cost_type,
00150                     float **old_region_occ_x,
00151                     float **old_region_occ_y,
00152                     int num_regions,
00153                     boolean fixed_pins,
00154                     enum e_place_algorithm place_algorithm,
00155                     float timing_tradeoff,
00156                     float inverse_prev_bb_cost,
00157                     float inverse_prev_timing_cost,
00158                     float *delay_cost,
00159                     int *x_lookup);
00160 
00161 static void check_place(float bb_cost,
00162                         float timing_cost,
00163                         int place_cost_type,
00164                         int num_regions,
00165                         enum e_place_algorithm place_algorithm,
00166                         float delay_cost);
00167 
00168 static float starting_t(float *cost_ptr,
00169                         float *bb_cost_ptr,
00170                         float *timing_cost_ptr,
00171                         int place_cost_type,
00172                         float **old_region_occ_x,
00173                         float **old_region_occ_y,
00174                         int num_regions,
00175                         boolean fixed_pins,
00176                         struct s_annealing_sched annealing_sched,
00177                         int max_moves,
00178                         float rlim,
00179                         enum e_place_algorithm place_algorithm,
00180                         float timing_tradeoff,
00181                         float inverse_prev_bb_cost,
00182                         float inverse_prev_timing_cost,
00183                         float *delay_cost_ptr);
00184 
00185 
00186 static void update_t(float *t,
00187                      float std_dev,
00188                      float rlim,
00189                      float success_rat,
00190                      struct s_annealing_sched annealing_sched);
00191 
00192 static void update_rlim(float *rlim,
00193                         float success_rat);
00194 
00195 static int exit_crit(float t,
00196                      float cost,
00197                      struct s_annealing_sched annealing_sched);
00198 
00199 static int count_connections(void);
00200 
00201 static void compute_net_pin_index_values(void);
00202 
00203 static double get_std_dev(int n,
00204                           double sum_x_squared,
00205                           double av_x);
00206 
00207 static void free_fast_cost_update_structs(void);
00208 
00209 static float recompute_bb_cost(int place_cost_type,
00210                                int num_regions);
00211 
00212 static float comp_td_point_to_point_delay(int inet,
00213                                           int ipin);
00214 
00215 static void update_td_cost(int b_from,
00216                            int b_to,
00217                            int num_of_pins);
00218 
00219 static void comp_delta_td_cost(int b_from,
00220                                int b_to,
00221                                int num_of_pins,
00222                                float *delta_timing,
00223                                float *delta_delay);
00224 
00225 static void comp_td_costs(float *timing_cost,
00226                           float *connection_delay_sum);
00227 
00228 static int assess_swap(float delta_c,
00229                        float t);
00230 
00231 static boolean find_to(int x_from,
00232                        int y_from,
00233                        t_type_ptr type,
00234                        float rlim,
00235                        int *x_lookup,
00236                        int *x_to,
00237                        int *y_to);
00238 
00239 static void get_non_updateable_bb(int inet,
00240                                   struct s_bb *bb_coord_new);
00241 
00242 static void update_bb(int inet,
00243                       struct s_bb *bb_coord_new,
00244                       struct s_bb *bb_edge_new,
00245                       int xold,
00246                       int yold,
00247                       int xnew,
00248                       int ynew);
00249 
00250 static int find_affected_nets(int *nets_to_update,
00251                               int *net_block_moved,
00252                               int b_from,
00253                               int b_to,
00254                               int num_of_pins);
00255 
00256 static float get_net_cost(int inet,
00257                           struct s_bb *bb_ptr);
00258 
00259 static float nonlinear_cong_cost(int num_regions);
00260 
00261 static void update_region_occ(int inet,
00262                               struct s_bb *coords,
00263                               int add_or_sub,
00264                               int num_regions);
00265 
00266 static void save_region_occ(float **old_region_occ_x,
00267                             float **old_region_occ_y,
00268                             int num_regions);
00269 
00270 static void restore_region_occ(float **old_region_occ_x,
00271                                float **old_region_occ_y,
00272                                int num_regions);
00273 
00274 static void get_bb_from_scratch(int inet,
00275                                 struct s_bb *coords,
00276                                 struct s_bb *num_on_edges);
00277 
00278 static double get_net_wirelength_estimate(int inet,
00279                                           struct s_bb *bbptr);
00280 
00281 /*****************************************************************************/
00282 /* RESEARCH TODO: Bounding Box and rlim need to be redone for heterogeneous to prevent a QoR penalty */
00283 /** Does almost all the work of placing a circuit.  Width_fac gives the  
00284  * width of the widest channel.  Place_cost_exp says what exponent the   
00285  * width should be taken to when calculating costs.  This allows a       
00286  * greater bias for anisotropic architectures.  Place_cost_type          
00287  * determines which cost function is used.  num_regions is used only     
00288  * the place_cost_type is NONLINEAR_CONG.                                
00289  */
00290 void
00291 try_place(struct s_placer_opts placer_opts,
00292           struct s_annealing_sched annealing_sched,
00293           t_chan_width_dist chan_width_dist,
00294           struct s_router_opts router_opts,
00295           struct s_det_routing_arch det_routing_arch,
00296           t_segment_inf * segment_inf,
00297           t_timing_inf timing_inf,
00298           t_mst_edge *** mst)
00299 {
00300     int tot_iter, inner_iter, success_sum;
00301     int move_lim, moves_since_cost_recompute, width_fac;
00302     float t, success_rat, rlim, d_max, est_crit;
00303     float cost, timing_cost, bb_cost, new_bb_cost, new_timing_cost;
00304     float delay_cost, new_delay_cost, place_delay_value;
00305     float inverse_prev_bb_cost, inverse_prev_timing_cost;
00306     float oldt;
00307     double av_cost, av_bb_cost, av_timing_cost, av_delay_cost,
00308         sum_of_squares, std_dev;
00309     float **old_region_occ_x, **old_region_occ_y;
00310     char msg[BUFSIZE];
00311     boolean fixed_pins;         /* Can pads move or not? */
00312     int num_connections;
00313     int inet, ipin, outer_crit_iter_count, inner_crit_iter_count,
00314         inner_recompute_limit;
00315     float **net_slack, **net_delay;
00316     float crit_exponent;
00317     float first_rlim, final_rlim, inverse_delta_rlim;
00318     float **remember_net_delay_original_ptr;    /*used to free net_delay if it is re-assigned */
00319 
00320     int *x_lookup;              /* Used to quickly determine valid swap columns */
00321 
00322     /* Allocated here because it goes into timing critical code where each memory allocation is expensive */
00323     x_lookup = my_malloc(nx * sizeof(int));
00324         net_delay = net_slack = NULL;
00325 
00326     remember_net_delay_original_ptr = NULL;     /*prevents compiler warning */
00327 
00328     if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
00329        placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE ||
00330        placer_opts.enable_timing_computations)
00331         {
00332             /*do this before the initial placement to avoid messing up the initial placement */
00333             alloc_lookups_and_criticalities(chan_width_dist,
00334                                             router_opts,
00335                                             det_routing_arch,
00336                                             segment_inf,
00337                                             timing_inf,
00338                                             &net_delay, &net_slack);
00339 
00340             remember_net_delay_original_ptr = net_delay;
00341 
00342             /*#define PRINT_LOWER_BOUND */
00343 #ifdef PRINT_LOWER_BOUND
00344             /*print the crit_path, assuming delay between blocks that are*
00345              *block_dist apart*/
00346 
00347             if(placer_opts.block_dist <= nx)
00348                 place_delay_value =
00349                     delta_clb_to_clb[placer_opts.block_dist][0];
00350             else if(placer_opts.block_dist <= ny)
00351                 place_delay_value =
00352                     delta_clb_to_clb[0][placer_opts.block_dist];
00353             else
00354                 place_delay_value = delta_clb_to_clb[nx][ny];
00355 
00356             printf("\nLower bound assuming delay of %g\n", place_delay_value);
00357 
00358             load_constant_net_delay(net_delay, place_delay_value);
00359             load_timing_graph_net_delays(net_delay);
00360             d_max = load_net_slack(net_slack, 0);
00361 
00362 #ifdef CREATE_ECHO_FILES
00363             print_critical_path("Placement_Lower_Bound.echo");
00364             print_sink_delays("Placement_Lower_Bound_Sink_Delays.echo");
00365 #endif /* CREATE_ECHO_FILES */
00366 
00367             /*also print sink delays assuming 0 delay between blocks, 
00368              * this tells us how much logic delay is on each path */
00369 
00370             load_constant_net_delay(net_delay, 0);
00371             load_timing_graph_net_delays(net_delay);
00372             d_max = load_net_slack(net_slack, 0);
00373 
00374 #ifdef CREATE_ECHO_FILES
00375             print_sink_delays("Placement_Logic_Sink_Delays.echo");
00376 #endif /* CREATE_ECHO_FILES */
00377 #endif
00378 
00379         }
00380 
00381     width_fac = placer_opts.place_chan_width;
00382     if(placer_opts.pad_loc_type == FREE)
00383         fixed_pins = FALSE;
00384     else
00385         fixed_pins = TRUE;
00386 
00387     init_chan(width_fac, chan_width_dist);
00388 
00389     alloc_and_load_placement_structs(placer_opts.place_cost_type,
00390                                      placer_opts.num_regions,
00391                                      placer_opts.place_cost_exp,
00392                                      &old_region_occ_x, &old_region_occ_y,
00393                                      placer_opts);
00394 
00395     initial_placement(placer_opts.pad_loc_type, placer_opts.pad_loc_file);
00396     init_draw_coords((float)width_fac);
00397 
00398     /* Storing the number of pins on each type of block makes the swap routine *
00399      * slightly more efficient.                                                */
00400 
00401     /* Gets initial cost and loads bounding boxes. */
00402 
00403     if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
00404        placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
00405         {
00406             bb_cost = comp_bb_cost(NORMAL, placer_opts.place_cost_type,
00407                                    placer_opts.num_regions);
00408 
00409             crit_exponent = placer_opts.td_place_exp_first;     /*this will be modified when rlim starts to change */
00410 
00411             compute_net_pin_index_values();
00412 
00413             num_connections = count_connections();
00414             printf
00415                 ("\nThere are %d point to point connections in this circuit\n\n",
00416                  num_connections);
00417 
00418             if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE)
00419                 {
00420                     for(inet = 0; inet < num_nets; inet++)
00421                         for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
00422                             timing_place_crit[inet][ipin] = 0;  /*dummy crit values */
00423 
00424                     comp_td_costs(&timing_cost, &delay_cost);   /*first pass gets delay_cost, which is used 
00425                                                                  * in criticality computations in the next call
00426                                                                  * to comp_td_costs. */
00427                     place_delay_value = delay_cost / num_connections;   /*used for computing criticalities */
00428                     load_constant_net_delay(net_delay, place_delay_value, clb_net, num_nets);
00429 
00430                 }
00431             else
00432                 place_delay_value = 0;
00433 
00434 
00435             if(placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
00436                 {
00437                     net_delay = point_to_point_delay_cost;      /*this keeps net_delay up to date with      *
00438                                                                  * *the same values that the placer is using  *
00439                                                                  * *point_to_point_delay_cost is computed each*
00440                                                                  * *time that comp_td_costs is called, and is *
00441                                                                  * *also updated after any swap is accepted   */
00442                 }
00443 
00444 
00445             load_timing_graph_net_delays(net_delay);
00446             d_max = load_net_slack(net_slack, 0);
00447             load_criticalities(placer_opts, net_slack, d_max, crit_exponent);
00448             outer_crit_iter_count = 1;
00449 
00450             /*now we can properly compute costs  */
00451             comp_td_costs(&timing_cost, &delay_cost);   /*also puts proper values into point_to_point_delay_cost */
00452 
00453             inverse_prev_timing_cost = 1 / timing_cost;
00454             inverse_prev_bb_cost = 1 / bb_cost;
00455             cost = 1;           /*our new cost function uses normalized values of           */
00456             /*bb_cost and timing_cost, the value of cost will be reset  */
00457             /*to 1 at each temperature when *_TIMING_DRIVEN_PLACE is true */
00458         }
00459     else
00460         {                       /*BOUNDING_BOX_PLACE */
00461             cost = bb_cost = comp_bb_cost(NORMAL, placer_opts.place_cost_type,
00462                                           placer_opts.num_regions);
00463             timing_cost = 0;
00464             delay_cost = 0;
00465             place_delay_value = 0;
00466             outer_crit_iter_count = 0;
00467             num_connections = 0;
00468             d_max = 0;
00469             crit_exponent = 0;
00470 
00471             inverse_prev_timing_cost = 0;       /*inverses not used */
00472             inverse_prev_bb_cost = 0;
00473         }
00474 
00475     move_lim = (int)(annealing_sched.inner_num * pow(num_blocks, 1.3333));
00476 
00477     if(placer_opts.inner_loop_recompute_divider != 0)
00478         inner_recompute_limit = (int)(0.5 + (float)move_lim /
00479                                       (float)placer_opts.
00480                                       inner_loop_recompute_divider);
00481     else                        /*don't do an inner recompute */
00482         inner_recompute_limit = move_lim + 1;
00483 
00484 
00485     /* Sometimes I want to run the router with a random placement.  Avoid *
00486      * using 0 moves to stop division by 0 and 0 length vector problems,  *
00487      * by setting move_lim to 1 (which is still too small to do any       *
00488      * significant optimization).                                         */
00489 
00490     if(move_lim <= 0)
00491         move_lim = 1;
00492 
00493     rlim = (float)max(nx, ny);
00494 
00495     first_rlim = rlim;          /*used in timing-driven placement for exponent computation */
00496     final_rlim = 1;
00497     inverse_delta_rlim = 1 / (first_rlim - final_rlim);
00498 
00499     t = starting_t(&cost, &bb_cost, &timing_cost,
00500                    placer_opts.place_cost_type,
00501                    old_region_occ_x, old_region_occ_y,
00502                    placer_opts.num_regions, fixed_pins, annealing_sched,
00503                    move_lim, rlim, placer_opts.place_algorithm,
00504                    placer_opts.timing_tradeoff, inverse_prev_bb_cost,
00505                    inverse_prev_timing_cost, &delay_cost);
00506     tot_iter = 0;
00507     moves_since_cost_recompute = 0;
00508     printf
00509         ("Initial Placement Cost: %g bb_cost: %g td_cost: %g delay_cost: %g\n\n",
00510          cost, bb_cost, timing_cost, delay_cost);
00511 
00512 #ifndef SPEC
00513     printf
00514         ("%11s  %10s %11s  %11s  %11s %11s  %11s %9s %8s  %7s  %7s  %10s  %7s\n",
00515          "T", "Cost", "Av. BB Cost", "Av. TD Cost", "Av Tot Del",
00516          "P to P Del", "d_max", "Ac Rate", "Std Dev", "R limit", "Exp",
00517          "Tot. Moves", "Alpha");
00518     printf
00519         ("%11s  %10s %11s  %11s  %11s %11s  %11s %9s %8s  %7s  %7s  %10s  %7s\n",
00520          "--------", "----------", "-----------", "-----------",
00521          "---------", "----------", "-----", "-------", "-------",
00522          "-------", "-------", "----------", "-----");
00523 #endif
00524 
00525     sprintf(msg,
00526             "Initial Placement.  Cost: %g  BB Cost: %g  TD Cost %g  Delay Cost: %g "
00527             "\t d_max %g Channel Factor: %d", cost, bb_cost, timing_cost,
00528             delay_cost, d_max, width_fac);
00529     update_screen(MAJOR, msg, PLACEMENT, FALSE);
00530 
00531     while(exit_crit(t, cost, annealing_sched) == 0)
00532         {
00533 
00534             if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
00535                placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
00536                 {
00537                     cost = 1;
00538                 }
00539 
00540             av_cost = 0.;
00541             av_bb_cost = 0.;
00542             av_delay_cost = 0.;
00543             av_timing_cost = 0.;
00544             sum_of_squares = 0.;
00545             success_sum = 0;
00546 
00547             if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
00548                placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
00549                 {
00550 
00551                     if(outer_crit_iter_count >=
00552                        placer_opts.recompute_crit_iter
00553                        || placer_opts.inner_loop_recompute_divider != 0)
00554                         {
00555 #ifdef VERBOSE
00556                             printf("Outer Loop Recompute Criticalities\n");
00557 #endif
00558                             place_delay_value = delay_cost / num_connections;
00559 
00560                             if(placer_opts.place_algorithm ==
00561                                NET_TIMING_DRIVEN_PLACE)
00562                                 load_constant_net_delay(net_delay,
00563                                                         place_delay_value, clb_net, num_nets);
00564                             /*note, for path_based, the net delay is not updated since it is current,
00565                              *because it accesses point_to_point_delay array */
00566 
00567                             load_timing_graph_net_delays(net_delay);
00568                             d_max = load_net_slack(net_slack, 0);
00569                             load_criticalities(placer_opts, net_slack, d_max,
00570                                                crit_exponent);
00571                             /*recompute costs from scratch, based on new criticalities */
00572                             comp_td_costs(&timing_cost, &delay_cost);
00573                             outer_crit_iter_count = 0;
00574                         }
00575                     outer_crit_iter_count++;
00576 
00577                     /*at each temperature change we update these values to be used     */
00578                     /*for normalizing the tradeoff between timing and wirelength (bb)  */
00579                     inverse_prev_bb_cost = 1 / bb_cost;
00580                     inverse_prev_timing_cost = 1 / timing_cost;
00581                 }
00582 
00583             inner_crit_iter_count = 1;
00584 
00585             for(inner_iter = 0; inner_iter < move_lim; inner_iter++)
00586                 {
00587                     if(try_swap(t, &cost, &bb_cost, &timing_cost,
00588                                 rlim, placer_opts.place_cost_type,
00589                                 old_region_occ_x, old_region_occ_y,
00590                                 placer_opts.num_regions, fixed_pins,
00591                                 placer_opts.place_algorithm,
00592                                 placer_opts.timing_tradeoff,
00593                                 inverse_prev_bb_cost,
00594                                 inverse_prev_timing_cost, &delay_cost,
00595                                 x_lookup) == 1)
00596                         {
00597                             success_sum++;
00598                             av_cost += cost;
00599                             av_bb_cost += bb_cost;
00600                             av_timing_cost += timing_cost;
00601                             av_delay_cost += delay_cost;
00602                             sum_of_squares += cost * cost;
00603                         }
00604 
00605                     if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
00606                        || placer_opts.place_algorithm ==
00607                        PATH_TIMING_DRIVEN_PLACE)
00608                         {
00609 
00610                             if(inner_crit_iter_count >= inner_recompute_limit
00611                                && inner_iter != move_lim - 1)
00612                                 {       /*on last iteration don't recompute */
00613 
00614                                     inner_crit_iter_count = 0;
00615 #ifdef VERBOSE
00616                                     printf
00617                                         ("Inner Loop Recompute Criticalities\n");
00618 #endif
00619                                     if(placer_opts.place_algorithm ==
00620                                        NET_TIMING_DRIVEN_PLACE)
00621                                         {
00622                                             place_delay_value =
00623                                                 delay_cost / num_connections;
00624                                             load_constant_net_delay(net_delay,
00625                                                                     place_delay_value, clb_net, num_nets);
00626                                         }
00627 
00628                                     load_timing_graph_net_delays(net_delay);
00629                                     d_max = load_net_slack(net_slack, 0);
00630                                     load_criticalities(placer_opts, net_slack,
00631                                                        d_max, crit_exponent);
00632                                     comp_td_costs(&timing_cost, &delay_cost);
00633                                 }
00634                             inner_crit_iter_count++;
00635                         }
00636 #ifdef VERBOSE
00637                     printf
00638                         ("t = %g  cost = %g   bb_cost = %g timing_cost = %g move = %d dmax = %g\n",
00639                          t, cost, bb_cost, timing_cost, inner_iter, d_max);
00640                     if(fabs
00641                        (bb_cost -
00642                         comp_bb_cost(CHECK, placer_opts.place_cost_type,
00643                                      placer_opts.num_regions)) >
00644                        bb_cost * ERROR_TOL)
00645                         exit(1);
00646 #endif
00647                 }
00648 
00649             /* Lines below prevent too much round-off error from accumulating *
00650              * in the cost over many iterations.  This round-off can lead to  *
00651              * error checks failing because the cost is different from what   *
00652              * you get when you recompute from scratch.                       */
00653 
00654             moves_since_cost_recompute += move_lim;
00655             if(moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE)
00656                 {
00657                     new_bb_cost =
00658                         recompute_bb_cost(placer_opts.place_cost_type,
00659                                           placer_opts.num_regions);
00660                     if(fabs(new_bb_cost - bb_cost) > bb_cost * ERROR_TOL)
00661                         {
00662                             printf
00663                                 ("Error in try_place:  new_bb_cost = %g, old bb_cost = %g.\n",
00664                                  new_bb_cost, bb_cost);
00665                             exit(1);
00666                         }
00667                     bb_cost = new_bb_cost;
00668 
00669                     if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
00670                        || placer_opts.place_algorithm ==
00671                        PATH_TIMING_DRIVEN_PLACE)
00672                         {
00673                             comp_td_costs(&new_timing_cost, &new_delay_cost);
00674                             if(fabs(new_timing_cost - timing_cost) >
00675                                timing_cost * ERROR_TOL)
00676                                 {
00677                                     printf
00678                                         ("Error in try_place:  new_timing_cost = %g, old timing_cost = %g.\n",
00679                                          new_timing_cost, timing_cost);
00680                                     exit(1);
00681                                 }
00682                             if(fabs(new_delay_cost - delay_cost) >
00683                                delay_cost * ERROR_TOL)
00684                                 {
00685                                     printf
00686                                         ("Error in try_place:  new_delay_cost = %g, old delay_cost = %g.\n",
00687                                          new_delay_cost, delay_cost);
00688                                     exit(1);
00689                                 }
00690                             timing_cost = new_timing_cost;
00691                         }
00692 
00693                     if(placer_opts.place_algorithm == BOUNDING_BOX_PLACE)
00694                         {
00695                             cost = new_bb_cost;
00696                         }
00697                     moves_since_cost_recompute = 0;
00698                 }
00699 
00700             tot_iter += move_lim;
00701             success_rat = ((float)success_sum) / move_lim;
00702             if(success_sum == 0)
00703                 {
00704                     av_cost = cost;
00705                     av_bb_cost = bb_cost;
00706                     av_timing_cost = timing_cost;
00707                     av_delay_cost = delay_cost;
00708                 }
00709             else
00710                 {
00711                     av_cost /= success_sum;
00712                     av_bb_cost /= success_sum;
00713                     av_timing_cost /= success_sum;
00714                     av_delay_cost /= success_sum;
00715                 }
00716             std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);
00717 
00718 #ifndef SPEC
00719             printf
00720                 ("%11.5g  %10.6g %11.6g  %11.6g  %11.6g %11.6g %11.4g %9.4g %8.3g  %7.4g  %7.4g  %10d  ",
00721                  t, av_cost, av_bb_cost, av_timing_cost, av_delay_cost,
00722                  place_delay_value, d_max, success_rat, std_dev, rlim,
00723                  crit_exponent, tot_iter);
00724 #endif
00725 
00726             oldt = t;           /* for finding and printing alpha. */
00727             update_t(&t, std_dev, rlim, success_rat, annealing_sched);
00728 
00729 #ifndef SPEC
00730             printf("%7.4g\n", t / oldt);
00731                 fflush(stdout);
00732 #endif
00733 
00734             sprintf(msg,
00735                     "Cost: %g  BB Cost %g  TD Cost %g  Temperature: %g  d_max: %g",
00736                     cost, bb_cost, timing_cost, t, d_max);
00737             update_screen(MINOR, msg, PLACEMENT, FALSE);
00738             update_rlim(&rlim, success_rat);
00739 
00740             if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
00741                placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
00742                 {
00743                     crit_exponent =
00744                         (1 -
00745                          (rlim -
00746                           final_rlim) * inverse_delta_rlim) *
00747                         (placer_opts.td_place_exp_last -
00748                          placer_opts.td_place_exp_first) +
00749                         placer_opts.td_place_exp_first;
00750                 }
00751 #ifdef VERBOSE
00752             dump_clbs();
00753 #endif
00754         }
00755 
00756     t = 0;                      /* freeze out */
00757     av_cost = 0.;
00758     av_bb_cost = 0.;
00759     av_timing_cost = 0.;
00760     sum_of_squares = 0.;
00761     av_delay_cost = 0.;
00762     success_sum = 0;
00763 
00764     if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
00765        placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
00766         {
00767             /*at each temperature change we update these values to be used     */
00768             /*for normalizing the tradeoff between timing and wirelength (bb)  */
00769             if(outer_crit_iter_count >= placer_opts.recompute_crit_iter ||
00770                placer_opts.inner_loop_recompute_divider != 0)
00771                 {
00772 
00773 #ifdef VERBOSE
00774                     printf("Outer Loop Recompute Criticalities\n");
00775 #endif
00776                     place_delay_value = delay_cost / num_connections;
00777 
00778                     if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE)
00779                         load_constant_net_delay(net_delay, place_delay_value, clb_net, num_nets);
00780 
00781                     load_timing_graph_net_delays(net_delay);
00782                     d_max = load_net_slack(net_slack, 0);
00783                     load_criticalities(placer_opts, net_slack, d_max,
00784                                        crit_exponent);
00785                     /*recompute criticaliies */
00786                     comp_td_costs(&timing_cost, &delay_cost);
00787                     outer_crit_iter_count = 0;
00788                 }
00789             outer_crit_iter_count++;
00790 
00791             inverse_prev_bb_cost = 1 / (bb_cost);
00792             inverse_prev_timing_cost = 1 / (timing_cost);
00793         }
00794 
00795     inner_crit_iter_count = 1;
00796 
00797     for(inner_iter = 0; inner_iter < move_lim; inner_iter++)
00798         {
00799             if(try_swap(t, &cost, &bb_cost, &timing_cost,
00800                         rlim, placer_opts.place_cost_type,
00801                         old_region_occ_x, old_region_occ_y,
00802                         placer_opts.num_regions, fixed_pins,
00803                         placer_opts.place_algorithm,
00804                         placer_opts.timing_tradeoff, inverse_prev_bb_cost,
00805                         inverse_prev_timing_cost, &delay_cost, x_lookup) == 1)
00806                 {
00807                     success_sum++;
00808                     av_cost += cost;
00809                     av_bb_cost += bb_cost;
00810                     av_delay_cost += delay_cost;
00811                     av_timing_cost += timing_cost;
00812                     sum_of_squares += cost * cost;
00813 
00814                     if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
00815                        || placer_opts.place_algorithm ==
00816                        PATH_TIMING_DRIVEN_PLACE)
00817                         {
00818 
00819                             if(inner_crit_iter_count >= inner_recompute_limit
00820                                && inner_iter != move_lim - 1)
00821                                 {
00822 
00823                                     inner_crit_iter_count = 0;
00824 #ifdef VERBOSE
00825                                     printf
00826                                         ("Inner Loop Recompute Criticalities\n");
00827 #endif
00828                                     if(placer_opts.place_algorithm ==
00829                                        NET_TIMING_DRIVEN_PLACE)
00830                                         {
00831                                             place_delay_value =
00832                                                 delay_cost / num_connections;
00833                                             load_constant_net_delay(net_delay,
00834                                                                     place_delay_value, clb_net, num_nets);
00835                                         }
00836 
00837                                     load_timing_graph_net_delays(net_delay);
00838                                     d_max = load_net_slack(net_slack, 0);
00839                                     load_criticalities(placer_opts, net_slack,
00840                                                        d_max, crit_exponent);
00841                                     comp_td_costs(&timing_cost, &delay_cost);
00842                                 }
00843                             inner_crit_iter_count++;
00844                         }
00845                 }
00846 #ifdef VERBOSE
00847             printf("t = %g  cost = %g   move = %d\n", t, cost, tot_iter);
00848 #endif
00849         }
00850     tot_iter += move_lim;
00851     success_rat = ((float)success_sum) / move_lim;
00852     if(success_sum == 0)
00853         {
00854             av_cost = cost;
00855             av_bb_cost = bb_cost;
00856             av_delay_cost = delay_cost;
00857             av_timing_cost = timing_cost;
00858         }
00859     else
00860         {
00861             av_cost /= success_sum;
00862             av_bb_cost /= success_sum;
00863             av_delay_cost /= success_sum;
00864             av_timing_cost /= success_sum;
00865         }
00866 
00867     std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);
00868 
00869 
00870 #ifndef SPEC
00871     printf
00872         ("%11.5g  %10.6g %11.6g  %11.6g  %11.6g %11.6g %11.4g %9.4g %8.3g  %7.4g  %7.4g  %10d  \n\n",
00873          t, av_cost, av_bb_cost, av_timing_cost, av_delay_cost,
00874          place_delay_value, d_max, success_rat, std_dev, rlim,
00875          crit_exponent, tot_iter);
00876 
00877 #endif
00878 
00879 #ifdef VERBOSE
00880     dump_clbs();
00881 #endif
00882 
00883     check_place(bb_cost, timing_cost, placer_opts.place_cost_type,
00884                 placer_opts.num_regions, placer_opts.place_algorithm,
00885                 delay_cost);
00886 
00887     if(placer_opts.enable_timing_computations &&
00888        placer_opts.place_algorithm == BOUNDING_BOX_PLACE)
00889         {
00890             /*need this done since the timing data has not been kept up to date*
00891              *in bounding_box mode */
00892             for(inet = 0; inet < num_nets; inet++)
00893                 for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
00894                     timing_place_crit[inet][ipin] = 0;  /*dummy crit values */
00895             comp_td_costs(&timing_cost, &delay_cost);   /*computes point_to_point_delay_cost */
00896         }
00897 
00898     if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
00899        placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE ||
00900        placer_opts.enable_timing_computations)
00901         {
00902             net_delay = point_to_point_delay_cost;      /*this makes net_delay up to date with    *
00903                                                          *the same values that the placer is using*/
00904             load_timing_graph_net_delays(net_delay);
00905             est_crit = load_net_slack(net_slack, 0);
00906 #ifdef CREATE_ECHO_FILES
00907 /*              print_sink_delays("placement_sink_delays.echo"); */
00908             print_net_slack("placement_net_slacks.echo", net_slack);
00909             print_critical_path("placement_crit_path.echo");
00910 #endif /* CREATE_ECHO_FILES */
00911             printf("Placement Estimated Crit Path Delay: %g\n\n", est_crit);
00912         }
00913 
00914 
00915     sprintf(msg,
00916             "Placement. Cost: %g  bb_cost: %g td_cost: %g Channel Factor: %d d_max: %g",
00917             cost, bb_cost, timing_cost, width_fac, d_max);
00918     printf
00919         ("Placement. Cost: %g  bb_cost: %g  td_cost: %g  delay_cost: %g.\n",
00920          cost, bb_cost, timing_cost, delay_cost);
00921     update_screen(MAJOR, msg, PLACEMENT, FALSE);
00922 
00923 #ifdef SPEC
00924     printf("Total moves attempted: %d.0\n", tot_iter);
00925 #endif
00926 
00927     if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
00928        placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE ||
00929        placer_opts.enable_timing_computations)
00930         {
00931 
00932             net_delay = remember_net_delay_original_ptr;
00933 
00934             free_placement_structs(placer_opts.place_cost_type,
00935                                    placer_opts.num_regions, old_region_occ_x,
00936                                    old_region_occ_y, placer_opts);
00937             free_lookups_and_criticalities(&net_delay, &net_slack);
00938         }
00939 
00940     /* placement is done - find mst of all nets.
00941      * creating mst for each net; this gives me an ordering of sinks 
00942      * by which I will direct search (A*) for. */
00943     if(*mst)
00944         {
00945             for(inet = 0; inet < num_nets; inet++)
00946                 {
00947                     assert((*mst)[inet]);
00948                     free((*mst)[inet]);
00949                 }
00950             free(*mst);
00951         }
00952     *mst = (t_mst_edge **) my_malloc(sizeof(t_mst_edge *) * num_nets);
00953     for(inet = 0; inet < num_nets; inet++)
00954         {
00955             (*mst)[inet] = get_mst_of_net(inet);
00956         }
00957     free(x_lookup);
00958 }
00959 
00960 /** only count non-global connections */
00961 static int
00962 count_connections()
00963 {
00964 
00965     int count, inet;
00966 
00967     count = 0;
00968 
00969     for(inet = 0; inet < num_nets; inet++)
00970         {
00971 
00972             if(clb_net[inet].is_global)
00973                 continue;
00974 
00975             count += clb_net[inet].num_sinks;
00976         }
00977     return (count);
00978 }
00979 
00980 /** computes net_pin_index array, this array allows us to quickly
00981  * find what pin on the net a block pin corresponds to */
00982 static void
00983 compute_net_pin_index_values()
00984 {
00985     int inet, netpin, blk, iblk, ipin;
00986     t_type_ptr type;
00987 
00988     /*initialize values to OPEN */
00989     for(iblk = 0; iblk < num_blocks; iblk++)
00990         {
00991             type = block[iblk].type;
00992             for(ipin = 0; ipin < type->num_pins; ipin++)
00993                 {
00994                     net_pin_index[iblk][ipin] = OPEN;
00995                 }
00996         }
00997 
00998     for(inet = 0; inet < num_nets; inet++)
00999         {
01000 
01001             if(clb_net[inet].is_global)
01002                 continue;
01003 
01004             for(netpin = 0; netpin <= clb_net[inet].num_sinks; netpin++)
01005                 {
01006                     blk = clb_net[inet].node_block[netpin];
01007                     net_pin_index[blk][clb_net[inet].node_block_pin[netpin]] =
01008                         netpin;
01009                 }
01010         }
01011 }
01012 
01013 
01014 /** Returns the standard deviation of data set x.  There are n sample points, 
01015  * sum_x_squared is the summation over n of x^2 and av_x is the average x.   
01016  * All operations are done in double precision, since round off error can be 
01017  * a problem in the initial temp. std_dev calculation for big circuits.      
01018  */
01019 static double
01020 get_std_dev(int n,
01021             double sum_x_squared,
01022             double av_x)
01023 {
01024 
01025     double std_dev;
01026 
01027     if(n <= 1)
01028         std_dev = 0.;
01029     else
01030         std_dev = (sum_x_squared - n * av_x * av_x) / (double)(n - 1);
01031 
01032     if(std_dev > 0.)            /* Very small variances sometimes round negative */
01033         std_dev = sqrt(std_dev);
01034     else
01035         std_dev = 0.;
01036 
01037     return (std_dev);
01038 }
01039 
01040 
01041 /** Update the range limited to keep acceptance prob. near 0.44.  Use 
01042  * a floating point rlim to allow gradual transitions at low temps.  
01043  */
01044 static void
01045 update_rlim(float *rlim,
01046             float success_rat)
01047 {
01048     float upper_lim;
01049 
01050     *rlim = (*rlim) * (1. - 0.44 + success_rat);
01051     upper_lim = max(nx, ny);
01052     *rlim = min(*rlim, upper_lim);
01053     *rlim = max(*rlim, 1.);
01054 }
01055 
01056 
01057 /** Update the temperature according to the annealing schedule selected. */
01058 static void
01059 update_t(float *t,
01060          float std_dev,
01061          float rlim,
01062          float success_rat,
01063          struct s_annealing_sched annealing_sched)
01064 {
01065 
01066     /*  float fac; */
01067 
01068     if(annealing_sched.type == USER_SCHED)
01069         {
01070             *t = annealing_sched.alpha_t * (*t);
01071         }
01072 
01073     /* Old standard deviation based stuff is below.  This bogs down horribly 
01074      * for big circuits (alu4 and especially bigkey_mod). */
01075     /* #define LAMBDA .7  */
01076     /* ------------------------------------ */
01077 #if 0
01078     else if(std_dev == 0.)
01079         {
01080             *t = 0.;
01081         }
01082     else
01083         {
01084             fac = exp(-LAMBDA * (*t) / std_dev);
01085             fac = max(0.5, fac);
01086             *t = (*t) * fac;
01087         }
01088 #endif
01089     /* ------------------------------------- */
01090 
01091     else
01092     {                           /* AUTO_SCHED */
01093         if(success_rat > 0.96)
01094             {
01095                 *t = (*t) * 0.5;
01096             }
01097         else if(success_rat > 0.8)
01098             {
01099                 *t = (*t) * 0.9;
01100             }
01101         else if(success_rat > 0.15 || rlim > 1.)
01102             {
01103                 *t = (*t) * 0.95;
01104             }
01105         else
01106             {
01107                 *t = (*t) * 0.8;
01108             }
01109     }
01110 }
01111 
01112 
01113 /** Return 1 when the exit criterion is met.                        */
01114 static int
01115 exit_crit(float t,
01116           float cost,
01117           struct s_annealing_sched annealing_sched)
01118 {
01119     if(annealing_sched.type == USER_SCHED)
01120         {
01121             if(t < annealing_sched.exit_t)
01122                 {
01123                     return (1);
01124                 }
01125             else
01126                 {
01127                     return (0);
01128                 }
01129         }
01130 
01131     /* Automatic annealing schedule */
01132 
01133     if(t < 0.005 * cost / num_nets)
01134         {
01135             return (1);
01136         }
01137     else
01138         {
01139             return (0);
01140         }
01141 }
01142 
01143 
01144 /** Finds the starting temperature (hot condition).              */
01145 static float
01146 starting_t(float *cost_ptr,
01147            float *bb_cost_ptr,
01148            float *timing_cost_ptr,
01149            int place_cost_type,
01150            float **old_region_occ_x,
01151            float **old_region_occ_y,
01152            int num_regions,
01153            boolean fixed_pins,
01154            struct s_annealing_sched annealing_sched,
01155            int max_moves,
01156            float rlim,
01157            enum e_place_algorithm place_algorithm,
01158            float timing_tradeoff,
01159            float inverse_prev_bb_cost,
01160            float inverse_prev_timing_cost,
01161            float *delay_cost_ptr)
01162 {
01163     int i, num_accepted, move_lim;
01164     double std_dev, av, sum_of_squares; /* Double important to avoid round off */
01165     int *x_lookup;
01166 
01167     x_lookup = (int *)my_malloc(nx * sizeof(int));
01168 
01169     if(annealing_sched.type == USER_SCHED)
01170         return (annealing_sched.init_t);
01171 
01172     move_lim = min(max_moves, num_blocks);
01173 
01174     num_accepted = 0;
01175     av = 0.;
01176     sum_of_squares = 0.;
01177 
01178     /* Try one move per block.  Set t high so essentially all accepted. */
01179 
01180     for(i = 0; i < move_lim; i++)
01181         {
01182             if(try_swap(1.e30, cost_ptr, bb_cost_ptr, timing_cost_ptr, rlim,
01183                         place_cost_type,
01184                         old_region_occ_x, old_region_occ_y, num_regions,
01185                         fixed_pins, place_algorithm, timing_tradeoff,
01186                         inverse_prev_bb_cost, inverse_prev_timing_cost,
01187                         delay_cost_ptr, x_lookup) == 1)
01188                 {
01189                     num_accepted++;
01190                     av += *cost_ptr;
01191                     sum_of_squares += *cost_ptr * (*cost_ptr);
01192                 }
01193         }
01194 
01195     if(num_accepted != 0)
01196         av /= num_accepted;
01197     else
01198         av = 0.;
01199 
01200     std_dev = get_std_dev(num_accepted, sum_of_squares, av);
01201 
01202 #ifdef DEBUG
01203     if(num_accepted != move_lim)
01204         {
01205             printf
01206                 ("Warning:  Starting t: %d of %d configurations accepted.\n",
01207                  num_accepted, move_lim);
01208         }
01209 #endif
01210 
01211 #ifdef VERBOSE
01212     printf("std_dev: %g, average cost: %g, starting temp: %g\n",
01213            std_dev, av, 20. * std_dev);
01214 #endif
01215 
01216     free(x_lookup);
01217 
01218     /* Set the initial temperature to 20 times the standard of deviation */
01219     /* so that the initial temperature adjusts according to the circuit */
01220     return (20. * std_dev);
01221 }
01222 
01223 /** Picks some block and moves it to another spot.  If this spot is   
01224  * occupied, switch the blocks.  Assess the change in cost function  
01225  * and accept or reject the move.  If rejected, return 0.  If        
01226  * accepted return 1.  Pass back the new value of the cost function. 
01227  * rlim is the range limiter.                                                                            
01228  */
01229 static int
01230 try_swap(float t,
01231          float *cost,
01232          float *bb_cost,
01233          float *timing_cost,
01234          float rlim,
01235          int place_cost_type,
01236          float **old_region_occ_x,
01237          float **old_region_occ_y,
01238          int num_regions,
01239          boolean fixed_pins,
01240          enum e_place_algorithm place_algorithm,
01241          float timing_tradeoff,
01242          float inverse_prev_bb_cost,
01243          float inverse_prev_timing_cost,
01244          float *delay_cost,
01245          int *x_lookup)
01246 {
01247 
01248     int b_from, x_to, y_to, z_to, x_from, y_from, z_from, b_to;
01249     int i, k, inet, keep_switch, num_of_pins, max_pins_per_clb;
01250     int num_nets_affected, bb_index;
01251     float delta_c, bb_delta_c, timing_delta_c, delay_delta_c, newcost;
01252     static struct s_bb *bb_coord_new = NULL;
01253     static struct s_bb *bb_edge_new = NULL;
01254     static int *nets_to_update = NULL, *net_block_moved = NULL;
01255 
01256     max_pins_per_clb = 0;
01257     for(i = 0; i < num_types; i++)
01258         {
01259             max_pins_per_clb =
01260                 max(max_pins_per_clb, type_descriptors[i].num_pins);
01261         }
01262 
01263     /* Allocate the local bb_coordinate storage, etc. only once. */
01264 
01265     if(bb_coord_new == NULL)
01266         {
01267             bb_coord_new = (struct s_bb *)my_malloc(2 * max_pins_per_clb *
01268                                                     sizeof(struct s_bb));
01269             bb_edge_new = (struct s_bb *)my_malloc(2 * max_pins_per_clb *
01270                                                    sizeof(struct s_bb));
01271             nets_to_update =
01272                 (int *)my_malloc(2 * max_pins_per_clb * sizeof(int));
01273             net_block_moved =
01274                 (int *)my_malloc(2 * max_pins_per_clb * sizeof(int));
01275         }
01276 
01277 
01278     delay_delta_c = 0.0;
01279     b_from = my_irand(num_blocks - 1);
01280 
01281     /* If the pins are fixed we never move them from their initial    *
01282      * random locations.  The code below could be made more efficient *
01283      * by using the fact that pins appear first in the block list,    *
01284      * but this shouldn't cause any significant slowdown and won't be *
01285      * broken if I ever change the parser so that the pins aren't     *
01286      * necessarily at the start of the block list.                    */
01287 
01288     if(fixed_pins == TRUE)
01289         {
01290             while(block[b_from].type == IO_TYPE)
01291                 {
01292                     b_from = my_irand(num_blocks - 1);
01293                 }
01294         }
01295 
01296     x_from = block[b_from].x;
01297     y_from = block[b_from].y;
01298     z_from = block[b_from].z;
01299 
01300     if(!find_to
01301        (x_from, y_from, block[b_from].type, rlim, x_lookup, &x_to, &y_to))
01302         return FALSE;
01303 
01304     /* Make the switch in order to make computing the new bounding *
01305      * box simpler.  If the cost increase is too high, switch them *
01306      * back.  (block data structures switched, clbs not switched   *
01307      * until success of move is determined.)                       */
01308 
01309     z_to = 0;
01310     if(grid[x_to][y_to].type->capacity > 1)
01311         {
01312             z_to = my_irand(grid[x_to][y_to].type->capacity - 1);
01313         }
01314     if(grid[x_to][y_to].blocks[z_to] == EMPTY)
01315         {                       /* Moving to an empty location */
01316             b_to = EMPTY;
01317             block[b_from].x = x_to;
01318             block[b_from].y = y_to;
01319             block[b_from].z = z_to;
01320         }
01321     else
01322         {                       /* Swapping two blocks */
01323             b_to = grid[x_to][y_to].blocks[z_to];
01324             block[b_to].x = x_from;
01325             block[b_to].y = y_from;
01326             block[b_to].z = z_from;
01327 
01328             block[b_from].x = x_to;
01329             block[b_from].y = y_to;
01330             block[b_from].z = z_to;
01331         }
01332 
01333     /* Now update the cost function.  May have to do major optimizations *
01334      * here later.                                                       */
01335 
01336     /* I'm using negative values of temp_net_cost as a flag, so DO NOT   *
01337      * use cost functions that can go negative.                          */
01338 
01339     delta_c = 0;                /* Change in cost due to this swap. */
01340     bb_delta_c = 0;
01341     timing_delta_c = 0;
01342 
01343     num_of_pins = block[b_from].type->num_pins;
01344 
01345     num_nets_affected = find_affected_nets(nets_to_update, net_block_moved,
01346                                            b_from, b_to, num_of_pins);
01347 
01348     if(place_cost_type == NONLINEAR_CONG)
01349         {
01350             save_region_occ(old_region_occ_x, old_region_occ_y, num_regions);
01351         }
01352 
01353     bb_index = 0;               /* Index of new bounding box. */
01354 
01355     for(k = 0; k < num_nets_affected; k++)
01356         {
01357             inet = nets_to_update[k];
01358 
01359             /* If we swapped two blocks connected to the same net, its bounding box *
01360              * doesn't change.                                                      */
01361 
01362             if(net_block_moved[k] == FROM_AND_TO)
01363                 continue;
01364 
01365             if(clb_net[inet].num_sinks < SMALL_NET)
01366                 {
01367                     get_non_updateable_bb(inet, &bb_coord_new[bb_index]);
01368                 }
01369             else
01370                 {
01371                         /* TODO: Problem with making net update incremental, turn off for now */
01372                         get_non_updateable_bb(inet, &bb_coord_new[bb_index]);
01373 #if 0
01374                         /* TODO: Problem with making net update incremental, turn off for now */
01375                     if(net_block_moved[k] == FROM)
01376                         update_bb(inet, &bb_coord_new[bb_index],
01377                                   &bb_edge_new[bb_index], x_from, y_from,
01378                                   x_to, y_to);
01379                     else
01380                         update_bb(inet, &bb_coord_new[bb_index],
01381                                   &bb_edge_new[bb_index], x_to, y_to, x_from,
01382                                   y_from);
01383 #endif
01384                 }
01385 
01386             if(place_cost_type != NONLINEAR_CONG)
01387                 {
01388                     temp_net_cost[inet] =
01389                         get_net_cost(inet, &bb_coord_new[bb_index]);
01390                     bb_delta_c += temp_net_cost[inet] - net_cost[inet];
01391                 }
01392             else
01393                 {
01394                     /* Rip up, then replace with new bb. */
01395                     update_region_occ(inet, &bb_coords[inet], -1,
01396                                       num_regions);
01397                     update_region_occ(inet, &bb_coord_new[bb_index], 1,
01398                                       num_regions);
01399                 }
01400 
01401             bb_index++;
01402         }
01403 
01404     if(place_cost_type == NONLINEAR_CONG)
01405         {
01406             newcost = nonlinear_cong_cost(num_regions);
01407             bb_delta_c = newcost - *bb_cost;
01408         }
01409 
01410 
01411     if(place_algorithm == NET_TIMING_DRIVEN_PLACE ||
01412        place_algorithm == PATH_TIMING_DRIVEN_PLACE)
01413         {
01414             /*in this case we redefine delta_c as a combination of timing and bb.  *
01415              *additionally, we normalize all values, therefore delta_c is in       *
01416              *relation to 1*/
01417 
01418             comp_delta_td_cost(b_from, b_to, num_of_pins, &timing_delta_c,
01419                                &delay_delta_c);
01420 
01421             delta_c =
01422                 (1 - timing_tradeoff) * bb_delta_c * inverse_prev_bb_cost +
01423                 timing_tradeoff * timing_delta_c * inverse_prev_timing_cost;
01424         }
01425     else
01426         {
01427             delta_c = bb_delta_c;
01428         }
01429 
01430 
01431     keep_switch = assess_swap(delta_c, t);
01432 
01433     /* 1 -> move accepted, 0 -> rejected. */
01434 
01435     if(keep_switch)
01436         {
01437             *cost = *cost + delta_c;
01438             *bb_cost = *bb_cost + bb_delta_c;
01439 
01440 
01441             if(place_algorithm == NET_TIMING_DRIVEN_PLACE ||
01442                place_algorithm == PATH_TIMING_DRIVEN_PLACE)
01443                 {
01444                     /*update the point_to_point_timing_cost and point_to_point_delay_cost 
01445                      * values from the temporary values */
01446                     *timing_cost = *timing_cost + timing_delta_c;
01447                     *delay_cost = *delay_cost + delay_delta_c;
01448 
01449                     update_td_cost(b_from, b_to, num_of_pins);
01450                 }
01451 
01452             /* update net cost functions and reset flags. */
01453 
01454             bb_index = 0;
01455 
01456             for(k = 0; k < num_nets_affected; k++)
01457                 {
01458                     inet = nets_to_update[k];
01459 
01460                     /* If we swapped two blocks connected to the same net, its bounding box *
01461                      * doesn't change.                                                      */
01462 
01463                     if(net_block_moved[k] == FROM_AND_TO)
01464                         {
01465                             temp_net_cost[inet] = -1;
01466                             continue;
01467                         }
01468 
01469                     bb_coords[inet] = bb_coord_new[bb_index];
01470                     if(clb_net[inet].num_sinks >= SMALL_NET)
01471                         bb_num_on_edges[inet] = bb_edge_new[bb_index];
01472 
01473                     bb_index++;
01474 
01475                     net_cost[inet] = temp_net_cost[inet];
01476                     temp_net_cost[inet] = -1;
01477                 }
01478 
01479             /* Update clb data structures since we kept the move. */
01480             /* Swap physical location */
01481             grid[x_to][y_to].blocks[z_to] = b_from;
01482             grid[x_from][y_from].blocks[z_from] = b_to;
01483 
01484             if(EMPTY == b_to)
01485                 {               /* Moved to an empty location */
01486                     grid[x_to][y_to].usage++;
01487                     grid[x_from][y_from].usage--;
01488                 }
01489         }
01490 
01491     else
01492         {                       /* Move was rejected.  */
01493 
01494             /* Reset the net cost function flags first. */
01495             for(k = 0; k < num_nets_affected; k++)
01496                 {
01497                     inet = nets_to_update[k];
01498                     temp_net_cost[inet] = -1;
01499                 }
01500 
01501             /* Restore the block data structures to their state before the move. */
01502             block[b_from].x = x_from;
01503             block[b_from].y = y_from;
01504             block[b_from].z = z_from;
01505             if(b_to != EMPTY)
01506                 {
01507                     block[b_to].x = x_to;
01508                     block[b_to].y = y_to;
01509                     block[b_to].z = z_to;
01510                 }
01511 
01512             /* Restore the region occupancies to their state before the move. */
01513             if(place_cost_type == NONLINEAR_CONG)
01514                 {
01515                     restore_region_occ(old_region_occ_x, old_region_occ_y,
01516                                        num_regions);
01517                 }
01518         }
01519 
01520     return (keep_switch);
01521 }
01522 
01523 
01524 /** Saves the old occupancies of the placement subregions in case the  
01525  * current move is not accepted.  Used only for NONLINEAR_CONG.       
01526  */
01527 static void
01528 save_region_occ(float **old_region_occ_x,
01529                 float **old_region_occ_y,
01530                 int num_regions)
01531 {
01532     int i, j;
01533 
01534     for(i = 0; i < num_regions; i++)
01535         {
01536             for(j = 0; j < num_regions; j++)
01537                 {
01538                     old_region_occ_x[i][j] = place_region_x[i][j].occupancy;
01539                     old_region_occ_y[i][j] = place_region_y[i][j].occupancy;
01540                 }
01541         }
01542 }
01543 
01544 
01545 /** Restores the old occupancies of the placement subregions when the  
01546  * current move is not accepted.  Used only for NONLINEAR_CONG.       
01547  */
01548 static void
01549 restore_region_occ(float **old_region_occ_x,
01550                    float **old_region_occ_y,
01551                    int num_regions)
01552 {
01553 
01554     int i, j;
01555 
01556     for(i = 0; i < num_regions; i++)
01557         {
01558             for(j = 0; j < num_regions; j++)
01559                 {
01560                     place_region_x[i][j].occupancy = old_region_occ_x[i][j];
01561                     place_region_y[i][j].occupancy = old_region_occ_y[i][j];
01562                 }
01563         }
01564 }
01565 
01566 
01567 /*( Puts a list of all the nets connected to b_from and b_to into          
01568 * nets_to_update.  Returns the number of affected nets.  Net_block_moved 
01569 * is either FROM, TO or FROM_AND_TO -- the block connected to this net   
01570 * that has moved.                                                        
01571 */
01572 static int
01573 find_affected_nets(int *nets_to_update,
01574                    int *net_block_moved,
01575                    int b_from,
01576                    int b_to,
01577                    int num_of_pins)
01578 {
01579 
01580     int k, inet, affected_index, count;
01581 
01582     affected_index = 0;
01583 
01584     for(k = 0; k < num_of_pins; k++)
01585         {
01586             inet = block[b_from].nets[k];
01587 
01588             if(inet == OPEN)
01589                 continue;
01590 
01591             if(clb_net[inet].is_global)
01592                 continue;
01593 
01594             /* This is here in case the same block connects to a net twice. */
01595 
01596             if(temp_net_cost[inet] > 0.)
01597                 continue;
01598 
01599             nets_to_update[affected_index] = inet;
01600             net_block_moved[affected_index] = FROM;
01601             affected_index++;
01602             temp_net_cost[inet] = 1.;   /* Flag to say we've marked this net. */
01603         }
01604 
01605     if(b_to != EMPTY)
01606         {
01607             for(k = 0; k < num_of_pins; k++)
01608                 {
01609                     inet = block[b_to].nets[k];
01610 
01611                     if(inet == OPEN)
01612                         continue;
01613 
01614                     if(clb_net[inet].is_global)
01615                         continue;
01616 
01617                     if(temp_net_cost[inet] > 0.)
01618                         {       /* Net already marked. */
01619                             for(count = 0; count < affected_index; count++)
01620                                 {
01621                                     if(nets_to_update[count] == inet)
01622                                         {
01623                                             if(net_block_moved[count] == FROM)
01624                                                 net_block_moved[count] =
01625                                                     FROM_AND_TO;
01626                                             break;
01627                                         }
01628                                 }
01629 
01630 #ifdef DEBUG
01631                             if(count > affected_index)
01632                                 {
01633                                     printf
01634                                         ("Error in find_affected_nets -- count = %d,"
01635                                          " affected index = %d.\n", count,
01636                                          affected_index);
01637                                     exit(1);
01638                                 }
01639 #endif
01640                         }
01641 
01642                     else
01643                         {       /* Net not marked yet. */
01644 
01645                             nets_to_update[affected_index] = inet;
01646                             net_block_moved[affected_index] = TO;
01647                             affected_index++;
01648                             temp_net_cost[inet] = 1.;   /* Flag means we've  marked net. */
01649                         }
01650                 }
01651         }
01652 
01653     return (affected_index);
01654 }
01655 
01656 
01657 /** Returns the point to which I want to swap, properly range limited. 
01658  * rlim must always be between 1 and nx (inclusive) for this routine  
01659  * to work.  Assumes that a column only contains blocks of the same type.
01660  */
01661 static boolean
01662 find_to(int x_from,
01663         int y_from,
01664         t_type_ptr type,
01665         float rlim,
01666         int *x_lookup,
01667         int *x_to,
01668         int *y_to)
01669 {
01670 
01671     int x_rel, y_rel, iside, iplace, rlx, rly, min_x, max_x, min_y, max_y;
01672     int num_col_same_type, i, j;
01673 
01674     rlx = min(nx, rlim);        /* Only needed when nx < ny. */
01675     rly = min(ny, rlim);        /* Added rly for aspect_ratio != 1 case. */
01676 
01677     min_x = max(1, x_from - rlx);
01678     max_x = min(nx, x_from + rlx);
01679     min_y = max(1, y_from - rly);
01680     max_y = min(ny, y_from + rly);
01681 
01682     num_col_same_type = 0;
01683     j = 0;
01684     if(type != IO_TYPE)
01685         {
01686             for(i = min_x; i <= max_x; i++)
01687                 {
01688                     if(grid[i][1].type == type)
01689                         {
01690                             num_col_same_type++;
01691                             x_lookup[j] = i;
01692                             j++;
01693                         }
01694                 }
01695             assert(num_col_same_type != 0);
01696             if(num_col_same_type == 1 &&
01697                ((((max_y - min_y) / type->height) - 1) <= 0
01698                 || type->height > (ny / 2)))
01699                 return FALSE;
01700         }
01701 
01702 #ifdef DEBUG
01703     if(rlx < 1 || rlx > nx)
01704         {
01705             printf("Error in find_to: rlx = %d\n", rlx);
01706             exit(1);
01707         }
01708 #endif
01709 
01710     do
01711         {                       /* Until (x_to, y_to) different from (x_from, y_from) */
01712             if(type == IO_TYPE)
01713                 {               /* io_block to be moved. */
01714                     if(rlx >= nx)
01715                         {
01716                             iside = my_irand(3);
01717                             /*                              *
01718                              *       +-----1----+           *
01719                              *       |          |           *
01720                              *       |          |           *
01721                              *       0          2           *
01722                              *       |          |           *
01723                              *       |          |           *
01724                              *       +-----3----+           *
01725                              *                              */
01726                             switch (iside)
01727                                 {
01728                                 case 0:
01729                                     iplace = my_irand(ny - 1) + 1;
01730                                     *x_to = 0;
01731                                     *y_to = iplace;
01732                                     break;
01733                                 case 1:
01734                                     iplace = my_irand(nx - 1) + 1;
01735                                     *x_to = iplace;
01736                                     *y_to = ny + 1;
01737                                     break;
01738                                 case 2:
01739                                     iplace = my_irand(ny - 1) + 1;
01740                                     *x_to = nx + 1;
01741                                     *y_to = iplace;
01742                                     break;
01743                                 case 3:
01744                                     iplace = my_irand(nx - 1) + 1;
01745                                     *x_to = iplace;
01746                                     *y_to = 0;
01747                                     break;
01748                                 default:
01749                                     printf
01750                                         ("Error in find_to.  Unexpected io swap location.\n");
01751                                     exit(1);
01752                                 }
01753                         }
01754                     else
01755                         {       /* rlx is less than whole chip */
01756                             if(x_from == 0)
01757                                 {
01758                                     iplace = my_irand(2 * rly);
01759                                     *y_to = y_from - rly + iplace;
01760                                     *x_to = x_from;
01761                                     if(*y_to > ny)
01762                                         {
01763                                             *y_to = ny + 1;
01764                                             *x_to = my_irand(rlx - 1) + 1;
01765                                         }
01766                                     else if(*y_to < 1)
01767                                         {
01768                                             *y_to = 0;
01769                                             *x_to = my_irand(rlx - 1) + 1;
01770                                         }
01771                                 }
01772                             else if(x_from == nx + 1)
01773                                 {
01774                                     iplace = my_irand(2 * rly);
01775                                     *y_to = y_from - rly + iplace;
01776                                     *x_to = x_from;
01777                                     if(*y_to > ny)
01778                                         {
01779                                             *y_to = ny + 1;
01780                                             *x_to = nx - my_irand(rlx - 1);
01781                                         }
01782                                     else if(*y_to < 1)
01783                                         {
01784                                             *y_to = 0;
01785                                             *x_to = nx - my_irand(rlx - 1);
01786                                         }
01787                                 }
01788                             else if(y_from == 0)
01789                                 {
01790                                     iplace = my_irand(2 * rlx);
01791                                     *x_to = x_from - rlx + iplace;
01792                                     *y_to = y_from;
01793                                     if(*x_to > nx)
01794                                         {
01795                                             *x_to = nx + 1;
01796                                             *y_to = my_irand(rly - 1) + 1;
01797                                         }
01798                                     else if(*x_to < 1)
01799                                         {
01800                                             *x_to = 0;
01801                                             *y_to = my_irand(rly - 1) + 1;
01802                                         }
01803                                 }
01804                             else
01805                                 {       /* *y_from == ny + 1 */
01806                                     iplace = my_irand(2 * rlx);
01807                                     *x_to = x_from - rlx + iplace;
01808                                     *y_to = y_from;
01809                                     if(*x_to > nx)
01810                                         {
01811                                             *x_to = nx + 1;
01812                                             *y_to = ny - my_irand(rly - 1);
01813                                         }
01814                                     else if(*x_to < 1)
01815                                         {
01816                                             *x_to = 0;
01817                                             *y_to = ny - my_irand(rly - 1);
01818                                         }
01819                                 }
01820                         }       /* End rlx if */
01821                 }               /* end type if */
01822             else
01823                 {
01824                         if(nx == 1 && ny == 1) {
01825                                 return FALSE;
01826                         }
01827                     x_rel = my_irand(num_col_same_type - 1);
01828                     y_rel =
01829                         my_irand(max
01830                                  (0, ((max_y - min_y) / type->height) - 1));
01831                     *x_to = x_lookup[x_rel];
01832                     *y_to = min_y + y_rel * type->height;
01833                     *y_to = (*y_to) - grid[*x_to][*y_to].offset;        /* align it */
01834                     assert(*x_to >= 1 && *x_to <= nx);
01835                     assert(*y_to >= 1 && *y_to <= ny);                  
01836                 }
01837         }
01838     while((x_from == *x_to) && (y_from == *y_to));
01839 
01840 #ifdef DEBUG
01841     if(*x_to < 0 || *x_to > nx + 1 || *y_to < 0 || *y_to > ny + 1)
01842         {
01843             printf("Error in routine find_to:  (x_to,y_to) = (%d,%d)\n",
01844                    *x_to, *y_to);
01845             exit(1);
01846         }
01847 #endif
01848     assert(type == grid[*x_to][*y_to].type);
01849     return TRUE;
01850 }
01851 
01852 
01853 /** Returns: 1 -> move accepted, 0 -> rejected. */
01854 static int
01855 assess_swap(float delta_c,
01856             float t)
01857 {
01858     int accept;
01859     float prob_fac, fnum;
01860 
01861     if(delta_c <= 0)
01862         {
01863 
01864 #ifdef SPEC                     /* Reduce variation in final solution due to round off */
01865             fnum = my_frand();
01866 #endif
01867 
01868             accept = 1;
01869             return (accept);
01870         }
01871 
01872     if(t == 0.)
01873         return (0);
01874 
01875     fnum = my_frand();
01876     prob_fac = exp(-delta_c / t);
01877     if(prob_fac > fnum)
01878         {
01879             accept = 1;
01880         }
01881     else
01882         {
01883             accept = 0;
01884         }
01885     return (accept);
01886 }
01887 
01888 
01889 /** Recomputes the cost to eliminate roundoff that may have accrued.  
01890  * This routine does as little work as possible to compute this new  
01891  * cost.                                                             
01892  */
01893 static float
01894 recompute_bb_cost(int place_cost_type,
01895                   int num_regions)
01896 {
01897 
01898     int i, j, inet;
01899     float cost;
01900 
01901     cost = 0;
01902 
01903     /* Initialize occupancies to zero if regions are being used. */
01904 
01905     if(place_cost_type == NONLINEAR_CONG)
01906         {
01907             for(i = 0; i < num_regions; i++)
01908                 {
01909                     for(j = 0; j < num_regions; j++)
01910                         {
01911                             place_region_x[i][j].occupancy = 0.;
01912                             place_region_y[i][j].occupancy = 0.;
01913                         }
01914                 }
01915         }
01916 
01917     for(inet = 0; inet < num_nets; inet++)
01918         {                       /* for each net ... */
01919 
01920             if(clb_net[inet].is_global == FALSE)
01921                 {               /* Do only if not global. */
01922 
01923                     /* Bounding boxes don't have to be recomputed; they're correct. */
01924 
01925                     if(place_cost_type != NONLINEAR_CONG)
01926                         {
01927                             cost += net_cost[inet];
01928                         }
01929                     else
01930                         {       /* Must be nonlinear_cong case. */
01931                             update_region_occ(inet, &bb_coords[inet], 1,
01932                                               num_regions);
01933                         }
01934                 }
01935         }
01936 
01937     if(place_cost_type == NONLINEAR_CONG)
01938         {
01939             cost = nonlinear_cong_cost(num_regions);
01940         }
01941 
01942     return (cost);
01943 }
01944 
01945 
01946 /* *returns the delay of one point to point connection */
01947 static float
01948 comp_td_point_to_point_delay(int inet,
01949                              int ipin)
01950 {
01951     int source_block, sink_block;
01952     int delta_x, delta_y;
01953     t_type_ptr source_type, sink_type;
01954     float delay_source_to_sink;
01955 
01956     delay_source_to_sink = 0.;
01957 
01958     source_block = clb_net[inet].node_block[0];
01959     source_type = block[source_block].type;
01960 
01961     sink_block = clb_net[inet].node_block[ipin];
01962     sink_type = block[sink_block].type;
01963 
01964     assert(source_type != NULL);
01965     assert(sink_type != NULL);
01966 
01967     delta_x = abs(block[sink_block].x - block[source_block].x);
01968     delta_y = abs(block[sink_block].y - block[source_block].y);
01969 
01970     /* TODO low priority: Could be merged into one look-up table */
01971     /* Note: This heuristic is terrible on Quality of Results.  
01972      * A much better heuristic is to create a more comprehensive lookup table but
01973      * it's too late in the release cycle to do this.  Pushing until the next release */
01974     if(source_type == IO_TYPE)
01975         {
01976             if(sink_type == IO_TYPE)
01977                 delay_source_to_sink = delta_io_to_io[delta_x][delta_y];
01978             else
01979                 delay_source_to_sink = delta_io_to_clb[delta_x][delta_y];
01980         }
01981     else
01982         {
01983             if(sink_type == IO_TYPE)
01984                 delay_source_to_sink = delta_clb_to_io[delta_x][delta_y];
01985             else
01986                 delay_source_to_sink = delta_clb_to_clb[delta_x][delta_y];
01987         }
01988     if(delay_source_to_sink < 0)
01989         {
01990             printf
01991                 ("Error in comp_td_point_to_point_delay in place.c, bad delay_source_to_sink value\n");
01992             exit(1);
01993         }
01994 
01995     if(delay_source_to_sink < 0.)
01996         {
01997             printf
01998                 ("Error in comp_td_point_to_point_delay in place.c, delay is less than 0\n");
01999             exit(1);
02000         }
02001 
02002     return (delay_source_to_sink);
02003 }
02004 
02005 
02006 
02007 /** update the point_to_point_timing_cost values from the temporary 
02008  * values for all connections that have changed 
02009  */
02010 static void
02011 update_td_cost(int b_from,
02012                int b_to,
02013                int num_of_pins)
02014 {
02015     int blkpin, net_pin, inet, ipin;
02016 
02017     for(blkpin = 0; blkpin < num_of_pins; blkpin++)
02018         {
02019 
02020             inet = block[b_from].nets[blkpin];
02021 
02022             if(inet == OPEN)
02023                 continue;
02024 
02025             if(clb_net[inet].is_global)
02026                 continue;
02027 
02028             net_pin = net_pin_index[b_from][blkpin];
02029 
02030             if(net_pin != 0)
02031                 {
02032 
02033                     /*the following "if" prevents the value from being updated twice */
02034                     if(clb_net[inet].node_block[0] != b_to
02035                        && clb_net[inet].node_block[0] != b_from)
02036                         {
02037 
02038                             point_to_point_delay_cost[inet][net_pin] =
02039                                 temp_point_to_point_delay_cost[inet][net_pin];
02040                             temp_point_to_point_delay_cost[inet][net_pin] =
02041                                 -1;
02042 
02043                             point_to_point_timing_cost[inet][net_pin] =
02044                                 temp_point_to_point_timing_cost[inet]
02045                                 [net_pin];
02046                             temp_point_to_point_timing_cost[inet][net_pin] =
02047                                 -1;
02048                         }
02049                 }
02050             else
02051                 {               /*this net is being driven by a moved block, recompute */
02052                     /*all point to point connections on this net. */
02053                     for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
02054                         {
02055 
02056                             point_to_point_delay_cost[inet][ipin] =
02057                                 temp_point_to_point_delay_cost[inet][ipin];
02058                             temp_point_to_point_delay_cost[inet][ipin] = -1;
02059 
02060                             point_to_point_timing_cost[inet][ipin] =
02061                                 temp_point_to_point_timing_cost[inet][ipin];
02062                             temp_point_to_point_timing_cost[inet][ipin] = -1;
02063                         }
02064                 }
02065         }
02066 
02067     if(b_to != EMPTY)
02068         {
02069             for(blkpin = 0; blkpin < num_of_pins; blkpin++)
02070                 {
02071 
02072                     inet = block[b_to].nets[blkpin];
02073 
02074                     if(inet == OPEN)
02075                         continue;
02076 
02077                     if(clb_net[inet].is_global)
02078                         continue;
02079 
02080                     net_pin = net_pin_index[b_to][blkpin];
02081 
02082                     if(net_pin != 0)
02083                         {
02084 
02085                             /*the following "if" prevents the value from being updated 2x */
02086                             if(clb_net[inet].node_block[0] != b_to
02087                                && clb_net[inet].node_block[0] != b_from)
02088                                 {
02089 
02090                                     point_to_point_delay_cost[inet][net_pin] =
02091                                         temp_point_to_point_delay_cost[inet]
02092                                         [net_pin];
02093                                     temp_point_to_point_delay_cost[inet]
02094                                         [net_pin] = -1;
02095 
02096                                     point_to_point_timing_cost[inet][net_pin]
02097                                         =
02098                                         temp_point_to_point_timing_cost[inet]
02099                                         [net_pin];
02100                                     temp_point_to_point_timing_cost[inet]
02101                                         [net_pin] = -1;
02102                                 }
02103                         }
02104                     else
02105                         {       /*this net is being driven by a moved block, recompute */
02106                             /*all point to point connections on this net. */
02107                             for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
02108                                 {
02109 
02110                                     point_to_point_delay_cost[inet][ipin] =
02111                                         temp_point_to_point_delay_cost[inet]
02112                                         [ipin];
02113                                     temp_point_to_point_delay_cost[inet][ipin]
02114                                         = -1;
02115 
02116                                     point_to_point_timing_cost[inet][ipin] =
02117                                         temp_point_to_point_timing_cost[inet]
02118                                         [ipin];
02119                                     temp_point_to_point_timing_cost[inet]
02120                                         [ipin] = -1;
02121                                 }
02122                         }
02123                 }
02124         }
02125 }
02126 
02127 /** a net that is being driven by a moved block must have all of its  
02128  * sink timing costs recomputed. A net that is driving a moved block 
02129  * must only have the timing cost on the connection driving the input 
02130  * pin computed 
02131  */
02132 static void
02133 comp_delta_td_cost(int b_from,
02134                    int b_to,
02135                    int num_of_pins,
02136                    float *delta_timing,
02137                    float *delta_delay)
02138 {
02139 
02140     int inet, k, net_pin, ipin;
02141     float delta_timing_cost, delta_delay_cost, temp_delay;
02142 
02143     delta_timing_cost = 0.;
02144     delta_delay_cost = 0.;
02145 
02146 
02147     for(k = 0; k < num_of_pins; k++)
02148         {
02149             inet = block[b_from].nets[k];
02150 
02151             if(inet == OPEN)
02152                 continue;
02153 
02154             if(clb_net[inet].is_global)
02155                 continue;
02156 
02157             net_pin = net_pin_index[b_from][k];
02158 
02159             if(net_pin != 0)
02160                 {               /*this net is driving a moved block               */
02161 
02162                     /*if this net is being driven by a block that has moved, we do not  */
02163                     /*need to compute the change in the timing cost (here) since it will */
02164                     /*be computed in the fanout of the net on  the driving block, also  */
02165                     /*computing it here would double count the change, and mess up the  */
02166                     /*delta_timing_cost value */
02167                     if(clb_net[inet].node_block[0] != b_to
02168                        && clb_net[inet].node_block[0] != b_from)
02169                         {
02170                             temp_delay =
02171                                 comp_td_point_to_point_delay(inet, net_pin);
02172 
02173                             temp_point_to_point_delay_cost[inet][net_pin] =
02174                                 temp_delay;
02175                             temp_point_to_point_timing_cost[inet][net_pin] =
02176                                 timing_place_crit[inet][net_pin] * temp_delay;
02177 
02178                             delta_delay_cost +=
02179                                 temp_point_to_point_delay_cost[inet][net_pin]
02180                                 - point_to_point_delay_cost[inet][net_pin];
02181 
02182                             delta_timing_cost +=
02183                                 temp_point_to_point_timing_cost[inet][net_pin]
02184                                 - point_to_point_timing_cost[inet][net_pin];
02185                         }
02186                 }
02187             else
02188                 {               /*this net is being driven by a moved block, recompute */
02189                     /*all point to point connections on this net. */
02190                     for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
02191                         {
02192                             temp_delay =
02193                                 comp_td_point_to_point_delay(inet, ipin);
02194 
02195                             temp_point_to_point_delay_cost[inet][ipin] =
02196                                 temp_delay;
02197                             temp_point_to_point_timing_cost[inet][ipin] =
02198                                 timing_place_crit[inet][ipin] * temp_delay;
02199 
02200                             delta_delay_cost +=
02201                                 temp_point_to_point_delay_cost[inet][ipin] -
02202                                 point_to_point_delay_cost[inet][ipin];
02203 
02204                             delta_timing_cost +=
02205                                 temp_point_to_point_timing_cost[inet][ipin] -
02206                                 point_to_point_timing_cost[inet][ipin];
02207                         }
02208                 }
02209         }
02210 
02211     if(b_to != EMPTY)
02212         {
02213             for(k = 0; k < num_of_pins; k++)
02214                 {
02215                     inet = block[b_to].nets[k];
02216 
02217                     if(inet == OPEN)
02218                         continue;
02219 
02220                     if(clb_net[inet].is_global)
02221                         continue;
02222 
02223                     net_pin = net_pin_index[b_to][k];
02224 
02225                     if(net_pin != 0)
02226                         {       /*this net is driving a moved block */
02227 
02228                             /*if this net is being driven by a block that has moved, we do not */
02229                             /*need to compute the change in the timing cost (here) since it was */
02230                             /*computed in the fanout of the net on  the driving block, also    */
02231                             /*computing it here would double count the change, and mess up the */
02232                             /*delta_timing_cost value */
02233                             if(clb_net[inet].node_block[0] != b_to
02234                                && clb_net[inet].node_block[0] != b_from)
02235                                 {
02236                                     temp_delay =
02237                                         comp_td_point_to_point_delay(inet,
02238                                                                      net_pin);
02239 
02240                                     temp_point_to_point_delay_cost[inet]
02241                                         [net_pin] = temp_delay;
02242                                     temp_point_to_point_timing_cost[inet]
02243                                         [net_pin] =
02244                                         timing_place_crit[inet][net_pin] *
02245                                         temp_delay;
02246 
02247                                     delta_delay_cost +=
02248                                         temp_point_to_point_delay_cost[inet]
02249                                         [net_pin] -
02250                                         point_to_point_delay_cost[inet]
02251                                         [net_pin];
02252                                     delta_timing_cost +=
02253                                         temp_point_to_point_timing_cost[inet]
02254                                         [net_pin] -
02255                                         point_to_point_timing_cost[inet]
02256                                         [net_pin];
02257                                 }
02258                         }
02259                     else
02260                         {       /*this net is being driven by a moved block, recompute */
02261                             /*all point to point connections on this net. */
02262                             for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
02263                                 {
02264 
02265                                     temp_delay =
02266                                         comp_td_point_to_point_delay(inet,
02267                                                                      ipin);
02268 
02269                                     temp_point_to_point_delay_cost[inet][ipin]
02270                                         = temp_delay;
02271                                     temp_point_to_point_timing_cost[inet]
02272                                         [ipin] =
02273                                         timing_place_crit[inet][ipin] *
02274                                         temp_delay;
02275 
02276 
02277                                     delta_delay_cost +=
02278                                         temp_point_to_point_delay_cost[inet]
02279                                         [ipin] -
02280                                         point_to_point_delay_cost[inet][ipin];
02281                                     delta_timing_cost +=
02282                                         temp_point_to_point_timing_cost[inet]
02283                                         [ipin] -
02284                                         point_to_point_timing_cost[inet]
02285                                         [ipin];
02286                                 }
02287                         }
02288                 }
02289         }
02290 
02291     *delta_timing = delta_timing_cost;
02292     *delta_delay = delta_delay_cost;
02293 }
02294 
02295 /* computes the cost (from scratch) due to the delays and criticalities
02296  * on all point to point connections, we define the timing cost of     
02297  * each connection as criticality*delay 
02298  */
02299 static void
02300 comp_td_costs(float *timing_cost,
02301               float *connection_delay_sum)
02302 {
02303     int inet, ipin;
02304     float loc_timing_cost, loc_connection_delay_sum, temp_delay_cost,
02305         temp_timing_cost;
02306 
02307     loc_timing_cost = 0.;
02308     loc_connection_delay_sum = 0.;
02309 
02310     for(inet = 0; inet < num_nets; inet++)
02311         {                       /* for each net ... */
02312             if(clb_net[inet].is_global == FALSE)
02313                 {               /* Do only if not global. */
02314 
02315                     for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
02316                         {
02317 
02318                             temp_delay_cost =
02319                                 comp_td_point_to_point_delay(inet, ipin);
02320                             temp_timing_cost =
02321                                 temp_delay_cost *
02322                                 timing_place_crit[inet][ipin];
02323 
02324                             loc_connection_delay_sum += temp_delay_cost;
02325                             point_to_point_delay_cost[inet][ipin] =
02326                                 temp_delay_cost;
02327                             temp_point_to_point_delay_cost[inet][ipin] = -1;    /*undefined */
02328 
02329                             point_to_point_timing_cost[inet][ipin] =
02330                                 temp_timing_cost;
02331                             temp_point_to_point_timing_cost[inet][ipin] = -1;   /*undefined */
02332                             loc_timing_cost += temp_timing_cost;
02333                         }
02334                 }
02335         }
02336     *timing_cost = loc_timing_cost;
02337     *connection_delay_sum = loc_connection_delay_sum;
02338 }
02339 
02340 /** Finds the cost from scratch.  Done only when the placement  
02341  * has been radically changed (i.e. after initial placement).   
02342  * Otherwise find the cost change incrementally.  If method     
02343  * check is NORMAL, we find bounding boxes that are updateable  
02344  * for the larger nets.  If method is CHECK, all bounding boxes 
02345  * are found via the non_updateable_bb routine, to provide a    
02346  * cost which can be used to check the correctness of the       
02347  * other routine.                                               
02348  */
02349 static float
02350 comp_bb_cost(int method,
02351              int place_cost_type,
02352              int num_regions)
02353 {
02354 
02355     int i, j, k;
02356     float cost;
02357     double expected_wirelength;
02358 
02359     cost = 0;
02360     expected_wirelength = 0.0;
02361 
02362     /* Initialize occupancies to zero if regions are being used. */
02363 
02364     if(place_cost_type == NONLINEAR_CONG)
02365         {
02366             for(i = 0; i < num_regions; i++)
02367                 {
02368                     for(j = 0; j < num_regions; j++)
02369                         {
02370                             place_region_x[i][j].occupancy = 0.;
02371                             place_region_y[i][j].occupancy = 0.;
02372                         }
02373                 }
02374         }
02375 
02376     for(k = 0; k < num_nets; k++)
02377         {                       /* for each net ... */
02378 
02379             if(clb_net[k].is_global == FALSE)
02380                 {               /* Do only if not global. */
02381 
02382                     /* Small nets don't use incremental updating on their bounding boxes, *
02383                      * so they can use a fast bounding box calculator.                    */
02384 
02385                     if(clb_net[k].num_sinks >= SMALL_NET && method == NORMAL)
02386                         {
02387                                 get_non_updateable_bb(k, &bb_coords[k]);
02388 #if 0
02389                             get_bb_from_scratch(k, &bb_coords[k],
02390                                                 &bb_num_on_edges[k]);
02391 #endif
02392                         }
02393                     else
02394                         {
02395                             get_non_updateable_bb(k, &bb_coords[k]);
02396                         }
02397 
02398                     if(place_cost_type != NONLINEAR_CONG)
02399                         {
02400                             net_cost[k] = get_net_cost(k, &bb_coords[k]);
02401                             cost += net_cost[k];
02402                             if(method == CHECK)
02403                                 expected_wirelength +=
02404                                     get_net_wirelength_estimate(k,
02405                                                                 &bb_coords
02406                                                                 [k]);
02407                         }
02408                     else
02409                         {       /* Must be nonlinear_cong case. */
02410                             update_region_occ(k, &bb_coords[k], 1,
02411                                               num_regions);
02412                         }
02413                 }
02414         }
02415 
02416     if(place_cost_type == NONLINEAR_CONG)
02417         {
02418             cost = nonlinear_cong_cost(num_regions);
02419         }
02420 
02421     if(method == CHECK)
02422         printf("BB estimate of min-dist (placement) wirelength is ;%.0f\n",
02423                expected_wirelength);
02424 
02425     return (cost);
02426 }
02427 
02428 /** This routine computes the cost of a placement when the NONLINEAR_CONG 
02429  * option is selected.  It assumes that the occupancies of all the       
02430  * placement subregions have been properly updated, and simply           
02431  * computes the cost due to these occupancies by summing over all        
02432  * subregions.  This will be inefficient for moves that don't affect     
02433  * many subregions (i.e. small moves late in placement), esp. when there 
02434  * are a lot of subregions.  May recode later to update only affected    
02435  * subregions.                                                           
02436  */
02437 static float
02438 nonlinear_cong_cost(int num_regions)
02439 {
02440 
02441     float cost, tmp;
02442     int i, j;
02443 
02444     cost = 0.;
02445 
02446     for(i = 0; i < num_regions; i++)
02447         {
02448             for(j = 0; j < num_regions; j++)
02449                 {
02450 
02451                     /* Many different cost metrics possible.  1st try:  */
02452 
02453                     if(place_region_x[i][j].occupancy <
02454                        place_region_x[i][j].capacity)
02455                         {
02456                             cost += place_region_x[i][j].occupancy *
02457                                 place_region_x[i][j].inv_capacity;
02458                         }
02459                     else
02460                         {       /* Overused region -- penalize. */
02461 
02462                             tmp = place_region_x[i][j].occupancy *
02463                                 place_region_x[i][j].inv_capacity;
02464                             cost += tmp * tmp;
02465                         }
02466 
02467                     if(place_region_y[i][j].occupancy <
02468                        place_region_y[i][j].capacity)
02469                         {
02470                             cost += place_region_y[i][j].occupancy *
02471                                 place_region_y[i][j].inv_capacity;
02472                         }
02473                     else
02474                         {       /* Overused region -- penalize. */
02475 
02476                             tmp = place_region_y[i][j].occupancy *
02477                                 place_region_y[i][j].inv_capacity;
02478                             cost += tmp * tmp;
02479                         }
02480 
02481                 }
02482         }
02483 
02484     return (cost);
02485 }
02486 
02487 
02488 /** Called only when the place_cost_type is NONLINEAR_CONG.  If add_or_sub 
02489  * is 1, this uses the new net bounding box to increase the occupancy     
02490  * of some regions.  If add_or_sub = - 1, it decreases the occupancy      
02491  * by that due to this bounding box.                                      
02492  */
02493 static void
02494 update_region_occ(int inet,
02495                   struct s_bb *coords,
02496                   int add_or_sub,
02497                   int num_regions)
02498 {
02499 
02500     float net_xmin, net_xmax, net_ymin, net_ymax, crossing;
02501     float inv_region_len, inv_region_height;
02502     float inv_bb_len, inv_bb_height;
02503     float overlap_xlow, overlap_xhigh, overlap_ylow, overlap_yhigh;
02504     float y_overlap, x_overlap, x_occupancy, y_occupancy;
02505     int imin, imax, jmin, jmax, i, j;
02506 
02507     if(clb_net[inet].num_sinks >= 50)
02508         {
02509             crossing = 2.7933 + 0.02616 * ((clb_net[inet].num_sinks + 1) - 50);
02510         }
02511     else
02512         {
02513             crossing = cross_count[clb_net[inet].num_sinks];
02514         }
02515 
02516     net_xmin = coords->xmin - 0.5;
02517     net_xmax = coords->xmax + 0.5;
02518     net_ymin = coords->ymin - 0.5;
02519     net_ymax = coords->ymax + 0.5;
02520 
02521     /* I could precompute the two values below.  Should consider this. */
02522 
02523     inv_region_len = (float)num_regions / (float)nx;
02524     inv_region_height = (float)num_regions / (float)ny;
02525 
02526     /* Get integer coordinates defining the rectangular area in which the *
02527      * subregions have to be updated.  Formula is as follows:  subtract   *
02528      * 0.5 from net_xmin, etc. to get numbers from 0 to nx or ny;         *
02529      * divide by nx or ny to scale between 0 and 1; multiply by           *
02530      * num_regions to scale between 0 and num_regions; and truncate to    *
02531      * get the final answer.                                              */
02532 
02533     imin = (int)(net_xmin - 0.5) * inv_region_len;
02534     imax = (int)(net_xmax - 0.5) * inv_region_len;
02535     imax = min(imax, num_regions - 1);  /* Watch for weird roundoff */
02536 
02537     jmin = (int)(net_ymin - 0.5) * inv_region_height;
02538     jmax = (int)(net_ymax - 0.5) * inv_region_height;
02539     jmax = min(jmax, num_regions - 1);  /* Watch for weird roundoff */
02540 
02541     inv_bb_len = 1. / (net_xmax - net_xmin);
02542     inv_bb_height = 1. / (net_ymax - net_ymin);
02543 
02544     /* See RISA paper (ICCAD '94, pp. 690 - 695) for a description of why *
02545      * I use exactly this cost function.                                  */
02546 
02547     for(i = imin; i <= imax; i++)
02548         {
02549             for(j = jmin; j <= jmax; j++)
02550                 {
02551                     overlap_xlow = max(place_region_bounds_x[i], net_xmin);
02552                     overlap_xhigh =
02553                         min(place_region_bounds_x[i + 1], net_xmax);
02554                     overlap_ylow = max(place_region_bounds_y[j], net_ymin);
02555                     overlap_yhigh =
02556                         min(place_region_bounds_y[j + 1], net_ymax);
02557 
02558                     x_overlap = overlap_xhigh - overlap_xlow;
02559                     y_overlap = overlap_yhigh - overlap_ylow;
02560 
02561 #ifdef DEBUG
02562 
02563                     if(x_overlap < -0.001)
02564                         {
02565                             printf
02566                                 ("Error in update_region_occ:  x_overlap < 0"
02567                                  "\n inet = %d, overlap = %g\n", inet,
02568                                  x_overlap);
02569                         }
02570 
02571                     if(y_overlap < -0.001)
02572                         {
02573                             printf
02574                                 ("Error in update_region_occ:  y_overlap < 0"
02575                                  "\n inet = %d, overlap = %g\n", inet,
02576                                  y_overlap);
02577                         }
02578 #endif
02579 
02580 
02581                     x_occupancy =
02582                         crossing * y_overlap * x_overlap * inv_bb_height *
02583                         inv_region_len;
02584                     y_occupancy =
02585                         crossing * x_overlap * y_overlap * inv_bb_len *
02586                         inv_region_height;
02587 
02588                     place_region_x[i][j].occupancy +=
02589                         add_or_sub * x_occupancy;
02590                     place_region_y[i][j].occupancy +=
02591                         add_or_sub * y_occupancy;
02592                 }
02593         }
02594 
02595 }
02596 
02597 
02598 /** Frees the place_regions data structures needed by the NONLINEAR_CONG 
02599  * cost function.                                                       
02600  */
02601 static void
02602 free_place_regions(int num_regions)
02603 {
02604 
02605     free_matrix(place_region_x, 0, num_regions - 1, 0, sizeof(struct
02606                                                               s_place_region));
02607 
02608     free_matrix(place_region_y, 0, num_regions - 1, 0, sizeof(struct
02609                                                               s_place_region));
02610 
02611     free(place_region_bounds_x);
02612     free(place_region_bounds_y);
02613 }
02614 
02615 
02616 /** Frees the major structures needed by the placer (and not needed       
02617  * elsewhere).   
02618  */
02619 static void
02620 free_placement_structs(int place_cost_type,
02621                        int num_regions,
02622                        float **old_region_occ_x,
02623                        float **old_region_occ_y,
02624                        struct s_placer_opts placer_opts)
02625 {
02626     int inet;
02627 
02628     if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
02629        placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE ||
02630        placer_opts.enable_timing_computations)
02631         {
02632             for(inet = 0; inet < num_nets; inet++)
02633                 {
02634                     /*add one to the address since it is indexed from 1 not 0 */
02635 
02636                     point_to_point_delay_cost[inet]++;
02637                     free(point_to_point_delay_cost[inet]);
02638 
02639                     point_to_point_timing_cost[inet]++;
02640                     free(point_to_point_timing_cost[inet]);
02641 
02642                     temp_point_to_point_delay_cost[inet]++;
02643                     free(temp_point_to_point_delay_cost[inet]);
02644 
02645                     temp_point_to_point_timing_cost[inet]++;
02646                     free(temp_point_to_point_timing_cost[inet]);
02647                 }
02648             free(point_to_point_delay_cost);
02649             free(temp_point_to_point_delay_cost);
02650 
02651             free(point_to_point_timing_cost);
02652             free(temp_point_to_point_timing_cost);
02653 
02654             free_matrix(net_pin_index, 0, num_blocks - 1, 0, sizeof(int));
02655         }
02656 
02657 
02658     free(net_cost);
02659     free(temp_net_cost);
02660     free(bb_num_on_edges);
02661     free(bb_coords);
02662 
02663     net_cost = NULL;            /* Defensive coding. */
02664     temp_net_cost = NULL;
02665     bb_num_on_edges = NULL;
02666     bb_coords = NULL;
02667 
02668     if(place_cost_type == NONLINEAR_CONG)
02669         {
02670             free_place_regions(num_regions);
02671             free_matrix(old_region_occ_x, 0, num_regions - 1, 0,
02672                         sizeof(float));
02673             free_matrix(old_region_occ_y, 0, num_regions - 1, 0,
02674                         sizeof(float));
02675         }
02676 
02677     else if(place_cost_type == LINEAR_CONG)
02678         {
02679             free_fast_cost_update_structs();
02680         }
02681 }
02682 
02683 
02684 /** Allocates the major structures needed only by the placer, primarily for 
02685  * computing costs quickly and such.                                       
02686  */
02687 static void
02688 alloc_and_load_placement_structs(int place_cost_type,
02689                                  int num_regions,
02690                                  float place_cost_exp,
02691                                  float ***old_region_occ_x,
02692                                  float ***old_region_occ_y,
02693                                  struct s_placer_opts placer_opts)
02694 {
02695 
02696     int inet, ipin, max_pins_per_clb, i;
02697 
02698     max_pins_per_clb = 0;
02699     for(i = 0; i < num_types; i++)
02700         {
02701             max_pins_per_clb =
02702                 max(max_pins_per_clb, type_descriptors[i].num_pins);
02703         }
02704 
02705     if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
02706        placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE ||
02707        placer_opts.enable_timing_computations)
02708         {
02709             /*allocate structures associated with timing driven placement */
02710             /* [0..num_nets-1][1..num_pins-1]  */
02711             point_to_point_delay_cost =
02712                 (float **)my_malloc(num_nets * sizeof(float *));
02713             temp_point_to_point_delay_cost =
02714                 (float **)my_malloc(num_nets * sizeof(float *));
02715 
02716             point_to_point_timing_cost =
02717                 (float **)my_malloc(num_nets * sizeof(float *));
02718             temp_point_to_point_timing_cost =
02719                 (float **)my_malloc(num_nets * sizeof(float *));
02720 
02721             net_pin_index =
02722                 (int **)alloc_matrix(0, num_blocks - 1, 0,
02723                                      max_pins_per_clb - 1, sizeof(int));
02724 
02725             for(inet = 0; inet < num_nets; inet++)
02726                 {
02727 
02728                     /* in the following, subract one so index starts at *
02729                      * 1 instead of 0 */
02730                     point_to_point_delay_cost[inet] =
02731                         (float *)my_malloc(clb_net[inet].num_sinks *
02732                                            sizeof(float));
02733                     point_to_point_delay_cost[inet]--;
02734 
02735                     temp_point_to_point_delay_cost[inet] =
02736                         (float *)my_malloc(clb_net[inet].num_sinks *
02737                                            sizeof(float));
02738                     temp_point_to_point_delay_cost[inet]--;
02739 
02740                     point_to_point_timing_cost[inet] =
02741                         (float *)my_malloc(clb_net[inet].num_sinks *
02742                                            sizeof(float));
02743                     point_to_point_timing_cost[inet]--;
02744 
02745                     temp_point_to_point_timing_cost[inet] =
02746                         (float *)my_malloc(clb_net[inet].num_sinks *
02747                                            sizeof(float));
02748                     temp_point_to_point_timing_cost[inet]--;
02749                 }
02750             for(inet = 0; inet < num_nets; inet++)
02751                 {
02752                     for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
02753                         {
02754                             point_to_point_delay_cost[inet][ipin] = 0;
02755                             temp_point_to_point_delay_cost[inet][ipin] = 0;
02756                         }
02757                 }
02758         }
02759 
02760 
02761 
02762 
02763 
02764     net_cost = (float *)my_malloc(num_nets * sizeof(float));
02765     temp_net_cost = (float *)my_malloc(num_nets * sizeof(float));
02766 
02767     /* Used to store costs for moves not yet made and to indicate when a net's   *
02768      * cost has been recomputed. temp_net_cost[inet] < 0 means net's cost hasn't *
02769      * been recomputed.                                                          */
02770 
02771     for(inet = 0; inet < num_nets; inet++)
02772         temp_net_cost[inet] = -1.;
02773 
02774     bb_coords = (struct s_bb *)my_malloc(num_nets * sizeof(struct s_bb));
02775     bb_num_on_edges =
02776         (struct s_bb *)my_malloc(num_nets * sizeof(struct s_bb));
02777 
02778     /* Allocate storage for subregion data, if needed. */
02779 
02780     if(place_cost_type == NONLINEAR_CONG)
02781         {
02782             alloc_place_regions(num_regions);
02783             load_place_regions(num_regions);
02784             *old_region_occ_x = (float **)alloc_matrix(0, num_regions - 1, 0,
02785                                                        num_regions - 1,
02786                                                        sizeof(float));
02787             *old_region_occ_y =
02788                 (float **)alloc_matrix(0, num_regions - 1, 0, num_regions - 1,
02789                                        sizeof(float));
02790         }
02791     else
02792         {                       /* Shouldn't use them; crash hard if I do!   */
02793             *old_region_occ_x = NULL;
02794             *old_region_occ_y = NULL;
02795         }
02796 
02797     if(place_cost_type == LINEAR_CONG)
02798         {
02799             alloc_and_load_for_fast_cost_update(place_cost_exp);
02800         }
02801 }
02802 
02803 /** Allocates memory for the regional occupancy, cost, etc. counts 
02804  * kept when we're using the NONLINEAR_CONG placement cost        
02805  * function.                                                      
02806  */
02807 static void
02808 alloc_place_regions(int num_regions)
02809 {
02810 
02811     place_region_x =
02812         (struct s_place_region **)alloc_matrix(0, num_regions - 1, 0,
02813                                                num_regions - 1,
02814                                                sizeof(struct s_place_region));
02815 
02816     place_region_y =
02817         (struct s_place_region **)alloc_matrix(0, num_regions - 1, 0,
02818                                                num_regions - 1,
02819                                                sizeof(struct s_place_region));
02820 
02821     place_region_bounds_x = (float *)my_malloc((num_regions + 1) *
02822                                                sizeof(float));
02823 
02824     place_region_bounds_y = (float *)my_malloc((num_regions + 1) *
02825                                                sizeof(float));
02826 }
02827 
02828 /** Loads the capacity values in each direction for each of the placement 
02829  * regions.  The chip is divided into a num_regions x num_regions array. 
02830  */
02831 static void
02832 load_place_regions(int num_regions)
02833 {
02834 
02835     int i, j, low_block, high_block, rnum;
02836     float low_lim, high_lim, capacity, fac, block_capacity;
02837     float len_fac, height_fac;
02838 
02839     /* First load up horizontal channel capacities.  */
02840 
02841     for(j = 0; j < num_regions; j++)
02842         {
02843             capacity = 0.;
02844             low_lim = (float)j / (float)num_regions *ny + 1.;
02845             high_lim = (float)(j + 1) / (float)num_regions *ny;
02846 
02847             low_block = floor(low_lim);
02848             low_block = max(1, low_block);      /* Watch for weird roundoff effects. */
02849             high_block = ceil(high_lim);
02850             high_block = min(high_block, ny);
02851 
02852             block_capacity = (chan_width_x[low_block - 1] +
02853                               chan_width_x[low_block]) / 2.;
02854             if(low_block == 1)
02855                 block_capacity += chan_width_x[0] / 2.;
02856 
02857             fac = 1. - (low_lim - low_block);
02858             capacity += fac * block_capacity;
02859 
02860             for(rnum = low_block + 1; rnum < high_block; rnum++)
02861                 {
02862                     block_capacity =
02863                         (chan_width_x[rnum - 1] + chan_width_x[rnum]) / 2.;
02864                     capacity += block_capacity;
02865                 }
02866 
02867             block_capacity = (chan_width_x[high_block - 1] +
02868                               chan_width_x[high_block]) / 2.;
02869             if(high_block == ny)
02870                 block_capacity += chan_width_x[ny] / 2.;
02871 
02872             fac = 1. - (high_block - high_lim);
02873             capacity += fac * block_capacity;
02874 
02875             for(i = 0; i < num_regions; i++)
02876                 {
02877                     place_region_x[i][j].capacity = capacity;
02878                     place_region_x[i][j].inv_capacity = 1. / capacity;
02879                     place_region_x[i][j].occupancy = 0.;
02880                     place_region_x[i][j].cost = 0.;
02881                 }
02882         }
02883 
02884     /* Now load vertical channel capacities.  */
02885 
02886     for(i = 0; i < num_regions; i++)
02887         {
02888             capacity = 0.;
02889             low_lim = (float)i / (float)num_regions *nx + 1.;
02890             high_lim = (float)(i + 1) / (float)num_regions *nx;
02891 
02892             low_block = floor(low_lim);
02893             low_block = max(1, low_block);      /* Watch for weird roundoff effects. */
02894             high_block = ceil(high_lim);
02895             high_block = min(high_block, nx);
02896 
02897             block_capacity = (chan_width_y[low_block - 1] +
02898                               chan_width_y[low_block]) / 2.;
02899             if(low_block == 1)
02900                 block_capacity += chan_width_y[0] / 2.;
02901 
02902             fac = 1. - (low_lim - low_block);
02903             capacity += fac * block_capacity;
02904 
02905             for(rnum = low_block + 1; rnum < high_block; rnum++)
02906                 {
02907                     block_capacity =
02908                         (chan_width_y[rnum - 1] + chan_width_y[rnum]) / 2.;
02909                     capacity += block_capacity;
02910                 }
02911 
02912             block_capacity = (chan_width_y[high_block - 1] +
02913                               chan_width_y[high_block]) / 2.;
02914             if(high_block == nx)
02915                 block_capacity += chan_width_y[nx] / 2.;
02916 
02917             fac = 1. - (high_block - high_lim);
02918             capacity += fac * block_capacity;
02919 
02920             for(j = 0; j < num_regions; j++)
02921                 {
02922                     place_region_y[i][j].capacity = capacity;
02923                     place_region_y[i][j].inv_capacity = 1. / capacity;
02924                     place_region_y[i][j].occupancy = 0.;
02925                     place_region_y[i][j].cost = 0.;
02926                 }
02927         }
02928 
02929     /* Finally set up the arrays indicating the limits of each of the *
02930      * placement subregions.                                          */
02931 
02932     len_fac = (float)nx / (float)num_regions;
02933     height_fac = (float)ny / (float)num_regions;
02934 
02935     place_region_bounds_x[0] = 0.5;
02936     place_region_bounds_y[0] = 0.5;
02937 
02938     for(i = 1; i <= num_regions; i++)
02939         {
02940             place_region_bounds_x[i] = place_region_bounds_x[i - 1] + len_fac;
02941             place_region_bounds_y[i] =
02942                 place_region_bounds_y[i - 1] + height_fac;
02943         }
02944 }
02945 
02946 
02947 /** This routine finds the bounding box of each net from scratch (i.e.    
02948  * from only the block location information).  It updates both the       
02949  * coordinate and number of blocks on each edge information.  It         
02950  * should only be called when the bounding box information is not valid. 
02951  */
02952 static void
02953 get_bb_from_scratch(int inet,
02954                     struct s_bb *coords,
02955                     struct s_bb *num_on_edges)
02956 {
02957 
02958     int ipin, bnum, x, y, xmin, xmax, ymin, ymax;
02959     int xmin_edge, xmax_edge, ymin_edge, ymax_edge;
02960     int n_pins;
02961 
02962     n_pins = clb_net[inet].num_sinks + 1;
02963 
02964     x = block[clb_net[inet].node_block[0]].x;
02965         y = block[clb_net[inet].node_block[0]].y + block[clb_net[inet].node_block[0]].type->pin_height[clb_net[inet].node_block_pin[0]];
02966 
02967     x = max(min(x, nx), 1);
02968     y = max(min(y, ny), 1);
02969 
02970     xmin = x;
02971     ymin = y;
02972     xmax = x;
02973     ymax = y;
02974     xmin_edge = 1;
02975     ymin_edge = 1;
02976     xmax_edge = 1;
02977     ymax_edge = 1;
02978 
02979     for(ipin = 1; ipin < n_pins; ipin++)
02980         {
02981             bnum = clb_net[inet].node_block[ipin];
02982             x = block[bnum].x;
02983             y = block[bnum].y + block[clb_net[inet].node_block[ipin]].type->pin_height[clb_net[inet].node_block_pin[ipin]];
02984 
02985             /* Code below counts IO blocks as being within the 1..nx, 1..ny clb array. *
02986              * This is because channels do not go out of the 0..nx, 0..ny range, and   *
02987              * I always take all channels impinging on the bounding box to be within   *
02988              * that bounding box.  Hence, this "movement" of IO blocks does not affect *
02989              * the which channels are included within the bounding box, and it         *
02990              * simplifies the code a lot.                                              */
02991 
02992             x = max(min(x, nx), 1);
02993             y = max(min(y, ny), 1);
02994 
02995             if(x == xmin)
02996                 {
02997                     xmin_edge++;
02998                 }
02999             if(x == xmax)
03000                 {               /* Recall that xmin could equal xmax -- don't use else */
03001                     xmax_edge++;
03002                 }
03003             else if(x < xmin)
03004                 {
03005                     xmin = x;
03006                     xmin_edge = 1;
03007                 }
03008             else if(x > xmax)
03009                 {
03010                     xmax = x;
03011                     xmax_edge = 1;
03012                 }
03013 
03014             if(y == ymin)
03015                 {
03016                     ymin_edge++;
03017                 }
03018             if(y == ymax)
03019                 {
03020                     ymax_edge++;
03021                 }
03022             else if(y < ymin)
03023                 {
03024                     ymin = y;
03025                     ymin_edge = 1;
03026                 }
03027             else if(y > ymax)
03028                 {
03029                     ymax = y;
03030                     ymax_edge = 1;
03031                 }
03032         }
03033 
03034     /* Copy the coordinates and number on edges information into the proper   *
03035      * structures.                                                            */
03036 
03037     coords->xmin = xmin;
03038     coords->xmax = xmax;
03039     coords->ymin = ymin;
03040     coords->ymax = ymax;
03041 
03042     num_on_edges->xmin = xmin_edge;
03043     num_on_edges->xmax = xmax_edge;
03044     num_on_edges->ymin = ymin_edge;
03045     num_on_edges->ymax = ymax_edge;
03046 }
03047 
03048 
03049 /** WMF: Finds the estimate of wirelength due to one net by looking at   
03050  * its coordinate bounding box.                                         
03051  */
03052 static double
03053 get_net_wirelength_estimate(int inet,
03054                             struct s_bb *bbptr)
03055 {
03056 
03057     double ncost, crossing;
03058 
03059     /* Get the expected "crossing count" of a net, based on its number *
03060      * of pins.  Extrapolate for very large nets.                      */
03061 
03062     if(((clb_net[inet].num_sinks + 1) > 50) && ((clb_net[inet].num_sinks + 1) < 85))
03063         {
03064             crossing = 2.7933 + 0.02616 * ((clb_net[inet].num_sinks + 1) - 50);
03065         }
03066     else if((clb_net[inet].num_sinks + 1) >= 85)
03067         {
03068             crossing =
03069                 2.7933 + 0.011 * (clb_net[inet].num_sinks + 1) -
03070                 0.0000018 * (clb_net[inet].num_sinks + 1) * (clb_net[inet].num_sinks +
03071                                                          1);
03072         }
03073     else
03074         {
03075             crossing = cross_count[(clb_net[inet].num_sinks + 1) - 1];
03076         }
03077 
03078     /* Could insert a check for xmin == xmax.  In that case, assume  *
03079      * connection will be made with no bends and hence no x-cost.    *
03080      * Same thing for y-cost.                                        */
03081 
03082     /* Cost = wire length along channel * cross_count / average      *
03083      * channel capacity.   Do this for x, then y direction and add.  */
03084 
03085     ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing;
03086 
03087     ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing;
03088 
03089     return (ncost);
03090 }
03091 
03092 /** Finds the cost due to one net by looking at its coordinate bounding  
03093  * box.                                                                 
03094  */
03095 static float
03096 get_net_cost(int inet,
03097              struct s_bb *bbptr)
03098 {
03099 
03100     float ncost, crossing;
03101 
03102     /* Get the expected "crossing count" of a net, based on its number *
03103      * of pins.  Extrapolate for very large nets.                      */
03104 
03105     if((clb_net[inet].num_sinks + 1) > 50)
03106         {
03107             crossing = 2.7933 + 0.02616 * ((clb_net[inet].num_sinks + 1) - 50);
03108             /*    crossing = 3.0;    Old value  */
03109         }
03110     else
03111         {
03112             crossing = cross_count[(clb_net[inet].num_sinks + 1) - 1];
03113         }
03114 
03115     /* Could insert a check for xmin == xmax.  In that case, assume  *
03116      * connection will be made with no bends and hence no x-cost.    *
03117      * Same thing for y-cost.                                        */
03118 
03119     /* Cost = wire length along channel * cross_count / average      *
03120      * channel capacity.   Do this for x, then y direction and add.  */
03121 
03122     ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing *
03123         chanx_place_cost_fac[bbptr->ymax][bbptr->ymin - 1];
03124 
03125     ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing *
03126         chany_place_cost_fac[bbptr->xmax][bbptr->xmin - 1];
03127 
03128     return (ncost);
03129 }
03130 
03131 /** Finds the bounding box of a net and stores its coordinates in the  
03132  * bb_coord_new data structure.  This routine should only be called   
03133  * for small nets, since it does not determine enough information for 
03134  * the bounding box to be updated incrementally later.                
03135  * Currently assumes channels on both sides of the CLBs forming the   
03136  * edges of the bounding box can be used.  Essentially, I am assuming 
03137  * the pins always lie on the outside of the bounding box.            
03138  */
03139 static void
03140 get_non_updateable_bb(int inet,
03141                       struct s_bb *bb_coord_new)
03142 {
03143 
03144     int k, xmax, ymax, xmin, ymin, x, y;
03145 
03146     x = block[clb_net[inet].node_block[0]].x;
03147         y = block[clb_net[inet].node_block[0]].y + block[clb_net[inet].node_block[0]].type->pin_height[clb_net[inet].node_block_pin[0]];
03148 
03149     xmin = x;
03150     ymin = y;
03151     xmax = x;
03152     ymax = y;
03153 
03154     for(k = 1; k < (clb_net[inet].num_sinks + 1); k++)
03155         {
03156             x = block[clb_net[inet].node_block[k]].x;
03157             y = block[clb_net[inet].node_block[k]].y + block[clb_net[inet].node_block[k]].type->pin_height[clb_net[inet].node_block_pin[k]];
03158 
03159             if(x < xmin)
03160                 {
03161                     xmin = x;
03162                 }
03163             else if(x > xmax)
03164                 {
03165                     xmax = x;
03166                 }
03167 
03168             if(y < ymin)
03169                 {
03170                     ymin = y;
03171                 }
03172             else if(y > ymax)
03173                 {
03174                     ymax = y;
03175                 }
03176         }
03177 
03178     /* Now I've found the coordinates of the bounding box.  There are no *
03179      * channels beyond nx and ny, so I want to clip to that.  As well,   *
03180      * since I'll always include the channel immediately below and the   *
03181      * channel immediately to the left of the bounding box, I want to    *
03182      * clip to 1 in both directions as well (since minimum channel index *
03183      * is 0).  See route.c for a channel diagram.                        */
03184 
03185     bb_coord_new->xmin = max(min(xmin, nx), 1);
03186     bb_coord_new->ymin = max(min(ymin, ny), 1);
03187     bb_coord_new->xmax = max(min(xmax, nx), 1);
03188     bb_coord_new->ymax = max(min(ymax, ny), 1);
03189 }
03190 
03191 /** Updates the bounding box of a net by storing its coordinates in    
03192  * the bb_coord_new data structure and the number of blocks on each   
03193  * edge in the bb_edge_new data structure.  This routine should only  
03194  * be called for large nets, since it has some overhead relative to   
03195  * just doing a brute force bounding box calculation.  The bounding   
03196  * box coordinate and edge information for inet must be valid before  
03197  * this routine is called.                                            
03198  * Currently assumes channels on both sides of the CLBs forming the   
03199  * edges of the bounding box can be used.  Essentially, I am assuming 
03200  * the pins always lie on the outside of the bounding box.            
03201  */
03202 static void
03203 update_bb(int inet,
03204           struct s_bb *bb_coord_new,
03205           struct s_bb *bb_edge_new,
03206           int xold,
03207           int yold,
03208           int xnew,
03209           int ynew)
03210 {
03211 
03212     /* IO blocks are considered to be one cell in for simplicity. */
03213 
03214     xnew = max(min(xnew, nx), 1);
03215     ynew = max(min(ynew, ny), 1);
03216     xold = max(min(xold, nx), 1);
03217     yold = max(min(yold, ny), 1);
03218 
03219     /* Check if I can update the bounding box incrementally. */
03220 
03221     if(xnew < xold)
03222         {                       /* Move to left. */
03223 
03224             /* Update the xmax fields for coordinates and number of edges first. */
03225 
03226             if(xold == bb_coords[inet].xmax)
03227                 {               /* Old position at xmax. */
03228                     if(bb_num_on_edges[inet].xmax == 1)
03229                         {
03230                             get_bb_from_scratch(inet, bb_coord_new,
03231                                                 bb_edge_new);
03232                             return;
03233                         }
03234                     else
03235                         {
03236                             bb_edge_new->xmax =
03237                                 bb_num_on_edges[inet].xmax - 1;
03238                             bb_coord_new->xmax = bb_coords[inet].xmax;
03239                         }
03240                 }
03241 
03242             else
03243                 {               /* Move to left, old postion was not at xmax. */
03244                     bb_coord_new->xmax = bb_coords[inet].xmax;
03245                     bb_edge_new->xmax = bb_num_on_edges[inet].xmax;
03246                 }
03247 
03248             /* Now do the xmin fields for coordinates and number of edges. */
03249 
03250             if(xnew < bb_coords[inet].xmin)
03251                 {               /* Moved past xmin */
03252                     bb_coord_new->xmin = xnew;
03253                     bb_edge_new->xmin = 1;
03254                 }
03255 
03256             else if(xnew == bb_coords[inet].xmin)
03257                 {               /* Moved to xmin */
03258                     bb_coord_new->xmin = xnew;
03259                     bb_edge_new->xmin = bb_num_on_edges[inet].xmin + 1;
03260                 }
03261 
03262             else
03263                 {               /* Xmin unchanged. */
03264                     bb_coord_new->xmin = bb_coords[inet].xmin;
03265                     bb_edge_new->xmin = bb_num_on_edges[inet].xmin;
03266                 }
03267         }
03268 
03269     /* End of move to left case. */
03270     else if(xnew > xold)
03271         {                       /* Move to right. */
03272 
03273             /* Update the xmin fields for coordinates and number of edges first. */
03274 
03275             if(xold == bb_coords[inet].xmin)
03276                 {               /* Old position at xmin. */
03277                     if(bb_num_on_edges[inet].xmin == 1)
03278                         {
03279                             get_bb_from_scratch(inet, bb_coord_new,
03280                                                 bb_edge_new);
03281                             return;
03282                         }
03283                     else
03284                         {
03285                             bb_edge_new->xmin =
03286                                 bb_num_on_edges[inet].xmin - 1;
03287                             bb_coord_new->xmin = bb_coords[inet].xmin;
03288                         }
03289                 }
03290 
03291             else
03292                 {               /* Move to right, old position was not at xmin. */
03293                     bb_coord_new->xmin = bb_coords[inet].xmin;
03294                     bb_edge_new->xmin = bb_num_on_edges[inet].xmin;
03295                 }
03296 
03297             /* Now do the xmax fields for coordinates and number of edges. */
03298 
03299             if(xnew > bb_coords[inet].xmax)
03300                 {               /* Moved past xmax. */
03301                     bb_coord_new->xmax = xnew;
03302                     bb_edge_new->xmax = 1;
03303                 }
03304 
03305             else if(xnew == bb_coords[inet].xmax)
03306                 {               /* Moved to xmax */
03307                     bb_coord_new->xmax = xnew;
03308                     bb_edge_new->xmax = bb_num_on_edges[inet].xmax + 1;
03309                 }
03310 
03311             else
03312                 {               /* Xmax unchanged. */
03313                     bb_coord_new->xmax = bb_coords[inet].xmax;
03314                     bb_edge_new->xmax = bb_num_on_edges[inet].xmax;
03315                 }
03316         }
03317     /* End of move to right case. */
03318     else
03319         {                       /* xnew == xold -- no x motion. */
03320             bb_coord_new->xmin = bb_coords[inet].xmin;
03321             bb_coord_new->xmax = bb_coords[inet].xmax;
03322             bb_edge_new->xmin = bb_num_on_edges[inet].xmin;
03323             bb_edge_new->xmax = bb_num_on_edges[inet].xmax;
03324         }
03325 
03326     /* Now account for the y-direction motion. */
03327 
03328     if(ynew < yold)
03329         {                       /* Move down. */
03330 
03331             /* Update the ymax fields for coordinates and number of edges first. */
03332 
03333             if(yold == bb_coords[inet].ymax)
03334                 {               /* Old position at ymax. */
03335                     if(bb_num_on_edges[inet].ymax == 1)
03336                         {
03337                             get_bb_from_scratch(inet, bb_coord_new,
03338                                                 bb_edge_new);
03339                             return;
03340                         }
03341                     else
03342                         {
03343                             bb_edge_new->ymax =
03344                                 bb_num_on_edges[inet].ymax - 1;
03345                             bb_coord_new->ymax = bb_coords[inet].ymax;
03346                         }
03347                 }
03348 
03349             else
03350                 {               /* Move down, old postion was not at ymax. */
03351                     bb_coord_new->ymax = bb_coords[inet].ymax;
03352                     bb_edge_new->ymax = bb_num_on_edges[inet].ymax;
03353                 }
03354 
03355             /* Now do the ymin fields for coordinates and number of edges. */
03356 
03357             if(ynew < bb_coords[inet].ymin)
03358                 {               /* Moved past ymin */
03359                     bb_coord_new->ymin = ynew;
03360                     bb_edge_new->ymin = 1;
03361                 }
03362 
03363             else if(ynew == bb_coords[inet].ymin)
03364                 {               /* Moved to ymin */
03365                     bb_coord_new->ymin = ynew;
03366                     bb_edge_new->ymin = bb_num_on_edges[inet].ymin + 1;
03367                 }
03368 
03369             else
03370                 {               /* ymin unchanged. */
03371                     bb_coord_new->ymin = bb_coords[inet].ymin;
03372                     bb_edge_new->ymin = bb_num_on_edges[inet].ymin;
03373                 }
03374         }
03375     /* End of move down case. */
03376     else if(ynew > yold)
03377         {                       /* Moved up. */
03378 
03379             /* Update the ymin fields for coordinates and number of edges first. */
03380 
03381             if(yold == bb_coords[inet].ymin)
03382                 {               /* Old position at ymin. */
03383                     if(bb_num_on_edges[inet].ymin == 1)
03384                         {
03385                             get_bb_from_scratch(inet, bb_coord_new,
03386                                                 bb_edge_new);
03387                             return;
03388                         }
03389                     else
03390                         {
03391                             bb_edge_new->ymin =
03392                                 bb_num_on_edges[inet].ymin - 1;
03393                             bb_coord_new->ymin = bb_coords[inet].ymin;
03394                         }
03395                 }
03396 
03397             else
03398                 {               /* Moved up, old position was not at ymin. */
03399                     bb_coord_new->ymin = bb_coords[inet].ymin;
03400                     bb_edge_new->ymin = bb_num_on_edges[inet].ymin;
03401                 }
03402 
03403             /* Now do the ymax fields for coordinates and number of edges. */
03404 
03405             if(ynew > bb_coords[inet].ymax)
03406                 {               /* Moved past ymax. */
03407                     bb_coord_new->ymax = ynew;
03408                     bb_edge_new->ymax = 1;
03409                 }
03410 
03411             else if(ynew == bb_coords[inet].ymax)
03412                 {               /* Moved to ymax */
03413                     bb_coord_new->ymax = ynew;
03414                     bb_edge_new->ymax = bb_num_on_edges[inet].ymax + 1;
03415                 }
03416 
03417             else
03418                 {               /* ymax unchanged. */
03419                     bb_coord_new->ymax = bb_coords[inet].ymax;
03420                     bb_edge_new->ymax = bb_num_on_edges[inet].ymax;
03421                 }
03422         }
03423     /* End of move up case. */
03424     else
03425         {                       /* ynew == yold -- no y motion. */
03426             bb_coord_new->ymin = bb_coords[inet].ymin;
03427             bb_coord_new->ymax = bb_coords[inet].ymax;
03428             bb_edge_new->ymin = bb_num_on_edges[inet].ymin;
03429             bb_edge_new->ymax = bb_num_on_edges[inet].ymax;
03430         }
03431 }
03432 
03433 
03434 /** Randomly places the blocks to create an initial placement.     */
03435 static void
03436 initial_placement(enum e_pad_loc_type pad_loc_type,
03437                   char *pad_loc_file)
03438 {
03439 
03440     struct s_pos
03441     {
03442         int x;
03443         int y;
03444         int z;
03445     }
03446     **pos;                      /* [0..num_types-1][0..type_tsize - 1] */
03447     int i, j, k, iblk, choice, type_index, x, y, z;
03448     int *count, *index;         /* [0..num_types-1] */
03449 
03450     pos = (struct s_pos **)my_malloc(num_types * sizeof(struct s_pos *));
03451     count = (int *)my_calloc(num_types, sizeof(int));
03452     index = (int *)my_calloc(num_types, sizeof(int));
03453 
03454     /* Initialize all occupancy to zero. */
03455 
03456     for(i = 0; i <= nx + 1; i++)
03457         {
03458             for(j = 0; j <= ny + 1; j++)
03459                 {
03460                     grid[i][j].usage = 0;
03461                     for(k = 0; k < grid[i][j].type->capacity; k++)
03462                         {
03463                             grid[i][j].blocks[k] = EMPTY;
03464                             if(grid[i][j].offset == 0)
03465                                 {
03466                                     count[grid[i][j].type->index]++;
03467                                 }
03468                         }
03469                 }
03470         }
03471 
03472     for(i = 0; i < num_types; i++)
03473         {
03474             pos[i] =
03475                 (struct s_pos *)my_malloc(count[i] * sizeof(struct s_pos));
03476         }
03477 
03478     for(i = 0; i <= nx + 1; i++)
03479         {
03480             for(j = 0; j <= ny + 1; j++)
03481                 {
03482                     for(k = 0; k < grid[i][j].type->capacity; k++)
03483                         {
03484                             if(grid[i][j].offset == 0)
03485                                 {
03486                                     type_index = grid[i][j].type->index;
03487                                     pos[type_index][index[type_index]].x = i;
03488                                     pos[type_index][index[type_index]].y = j;
03489                                     pos[type_index][index[type_index]].z = k;
03490                                     index[type_index]++;
03491                                 }
03492                         }
03493                 }
03494         }
03495 
03496     for(iblk = 0; iblk < num_blocks; iblk++)
03497         {
03498             /* Don't do IOs if the user specifies IOs */
03499             if(!(block[iblk].type == IO_TYPE && pad_loc_type == USER))
03500                 {
03501                     type_index = block[iblk].type->index;
03502                     assert(count[type_index] > 0);
03503                     choice = my_irand(count[type_index] - 1);
03504                     x = pos[type_index][choice].x;
03505                     y = pos[type_index][choice].y;
03506                     z = pos[type_index][choice].z;
03507                     grid[x][y].blocks[z] = iblk;
03508                     grid[x][y].usage++;
03509 
03510                     /* Ensure randomizer doesn't pick this block again */
03511                     pos[type_index][choice] = pos[type_index][count[type_index] - 1];   /* overwrite used block position */
03512                     count[type_index]--;
03513                 }
03514         }
03515 
03516     if(pad_loc_type == USER)
03517         {
03518             read_user_pad_loc(pad_loc_file);
03519         }
03520 
03521     /* All the blocks are placed now.  Make the block array agree with the    *
03522      * clb array.                                                             */
03523 
03524     for(i = 0; i <= (nx + 1); i++)
03525         {
03526             for(j = 0; j <= (ny + 1); j++)
03527                 {
03528                     for(k = 0; k < grid[i][j].type->capacity; k++)
03529                         {
03530                             assert(grid[i][j].blocks != NULL);
03531                             iblk = grid[i][j].blocks[k];
03532 
03533                             if(iblk != EMPTY)
03534                                 {
03535                                     block[iblk].x = i;
03536                                     block[iblk].y = j;
03537                                     block[iblk].z = k;
03538                                 }
03539                         }
03540                 }
03541         }
03542 
03543 #ifdef VERBOSE
03544     printf("At end of initial_placement.\n");
03545     dump_clbs();
03546 #endif
03547     for(i = 0; i < num_types; i++)
03548         {
03549             free(pos[i]);
03550         }
03551     free(pos);                  /* Free the mapping list */
03552     free(index);
03553     free(count);
03554 }
03555 
03556 /** Frees the structures used to speed up evaluation of the nonlinear   
03557  * congestion cost function.                                           
03558  */
03559 static void
03560 free_fast_cost_update_structs(void)
03561 {
03562 
03563     int i;
03564 
03565     for(i = 0; i <= ny; i++)
03566         free(chanx_place_cost_fac[i]);
03567 
03568     free(chanx_place_cost_fac);
03569 
03570     for(i = 0; i <= nx; i++)
03571         free(chany_place_cost_fac[i]);
03572 
03573     free(chany_place_cost_fac);
03574 }
03575 
03576 /** Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac 
03577  * arrays with the inverse of the average number of tracks per channel   
03578  * between [subhigh] and [sublow].  This is only useful for the cost     
03579  * function that takes the length of the net bounding box in each        
03580  * dimension divided by the average number of tracks in that direction.  
03581  * For other cost functions, you don't have to bother calling this       
03582  * routine; when using the cost function described above, however, you   
03583  * must always call this routine after you call init_chan and before     
03584  * you do any placement cost determination.  The place_cost_exp factor   
03585  * specifies to what power the width of the channel should be taken --   
03586  * larger numbers make narrower channels more expensive.                 
03587  */
03588 static void
03589 alloc_and_load_for_fast_cost_update(float place_cost_exp)
03590 {
03591 
03592     int low, high, i;
03593 
03594     /* Access arrays below as chan?_place_cost_fac[subhigh][sublow].  Since   *
03595      * subhigh must be greater than or equal to sublow, we only need to       *
03596      * allocate storage for the lower half of a matrix.                       */
03597 
03598     chanx_place_cost_fac = (float **)my_malloc((ny + 1) * sizeof(float *));
03599     for(i = 0; i <= ny; i++)
03600         chanx_place_cost_fac[i] = (float *)my_malloc((i + 1) * sizeof(float));
03601 
03602     chany_place_cost_fac = (float **)my_malloc((nx + 1) * sizeof(float *));
03603     for(i = 0; i <= nx; i++)
03604         chany_place_cost_fac[i] = (float *)my_malloc((i + 1) * sizeof(float));
03605 
03606 
03607     /* First compute the number of tracks between channel high and channel *
03608      * low, inclusive, in an efficient manner.                             */
03609 
03610     chanx_place_cost_fac[0][0] = chan_width_x[0];
03611 
03612     for(high = 1; high <= ny; high++)
03613         {
03614             chanx_place_cost_fac[high][high] = chan_width_x[high];
03615             for(low = 0; low < high; low++)
03616                 {
03617                     chanx_place_cost_fac[high][low] =
03618                         chanx_place_cost_fac[high - 1][low] +
03619                         chan_width_x[high];
03620                 }
03621         }
03622 
03623     /* Now compute the inverse of the average number of tracks per channel *
03624      * between high and low.  The cost function divides by the average     *
03625      * number of tracks per channel, so by storing the inverse I convert   *
03626      * this to a faster multiplication.  Take this final number to the     *
03627      * place_cost_exp power -- numbers other than one mean this is no      *
03628      * longer a simple "average number of tracks"; it is some power of     *
03629      * that, allowing greater penalization of narrow channels.             */
03630 
03631     for(high = 0; high <= ny; high++)
03632         for(low = 0; low <= high; low++)
03633             {
03634                 chanx_place_cost_fac[high][low] = (high - low + 1.) /
03635                     chanx_place_cost_fac[high][low];
03636                 chanx_place_cost_fac[high][low] =
03637                     pow((double)chanx_place_cost_fac[high][low],
03638                         (double)place_cost_exp);
03639             }
03640 
03641 
03642     /* Now do the same thing for the y-directed channels.  First get the  *
03643      * number of tracks between channel high and channel low, inclusive.  */
03644 
03645     chany_place_cost_fac[0][0] = chan_width_y[0];
03646 
03647     for(high = 1; high <= nx; high++)
03648         {
03649             chany_place_cost_fac[high][high] = chan_width_y[high];
03650             for(low = 0; low < high; low++)
03651                 {
03652                     chany_place_cost_fac[high][low] =
03653                         chany_place_cost_fac[high - 1][low] +
03654                         chan_width_y[high];
03655                 }
03656         }
03657 
03658     /* Now compute the inverse of the average number of tracks per channel * 
03659      * between high and low.  Take to specified power.                     */
03660 
03661     for(high = 0; high <= nx; high++)
03662         for(low = 0; low <= high; low++)
03663             {
03664                 chany_place_cost_fac[high][low] = (high - low + 1.) /
03665                     chany_place_cost_fac[high][low];
03666                 chany_place_cost_fac[high][low] =
03667                     pow((double)chany_place_cost_fac[high][low],
03668                         (double)place_cost_exp);
03669             }
03670 }
03671 
03672 /** Checks that the placement has not confused our data structures. 
03673  * i.e. the clb and block structures agree about the locations of  
03674  * every block, blocks are in legal spots, etc.  Also recomputes   
03675  * the final placement cost from scratch and makes sure it is      
03676  * within roundoff of what we think the cost is.                   
03677  */
03678 static void
03679 check_place(float bb_cost,
03680             float timing_cost,
03681             int place_cost_type,
03682             int num_regions,
03683             enum e_place_algorithm place_algorithm,
03684             float delay_cost)
03685 {
03686 
03687     static int *bdone;
03688     int i, j, k, error = 0, bnum;
03689     float bb_cost_check;
03690     int usage_check;
03691     float timing_cost_check, delay_cost_check;
03692 
03693     bb_cost_check = comp_bb_cost(CHECK, place_cost_type, num_regions);
03694     printf("bb_cost recomputed from scratch is %g.\n", bb_cost_check);
03695     if(fabs(bb_cost_check - bb_cost) > bb_cost * ERROR_TOL)
03696         {
03697             printf
03698                 ("Error:  bb_cost_check: %g and bb_cost: %g differ in check_place.\n",
03699                  bb_cost_check, bb_cost);
03700             error++;
03701         }
03702 
03703     if(place_algorithm == NET_TIMING_DRIVEN_PLACE ||
03704        place_algorithm == PATH_TIMING_DRIVEN_PLACE)
03705         {
03706             comp_td_costs(&timing_cost_check, &delay_cost_check);
03707             printf("timing_cost recomputed from scratch is %g. \n",
03708                    timing_cost_check);
03709             if(fabs(timing_cost_check - timing_cost) >
03710                timing_cost * ERROR_TOL)
03711                 {
03712                     printf("Error:  timing_cost_check: %g and timing_cost: "
03713                            "%g differ in check_place.\n",
03714                            timing_cost_check, timing_cost);
03715                     error++;
03716                 }
03717             printf("delay_cost recomputed from scratch is %g. \n",
03718                    delay_cost_check);
03719             if(fabs(delay_cost_check - delay_cost) > delay_cost * ERROR_TOL)
03720                 {
03721                     printf("Error:  delay_cost_check: %g and delay_cost: "
03722                            "%g differ in check_place.\n",
03723                            delay_cost_check, delay_cost);
03724                     error++;
03725                 }
03726         }
03727 
03728     bdone = (int *)my_malloc(num_blocks * sizeof(int));
03729     for(i = 0; i < num_blocks; i++)
03730         bdone[i] = 0;
03731 
03732     /* Step through grid array. Check it against block array. */
03733     for(i = 0; i <= (nx + 1); i++)
03734         for(j = 0; j <= (ny + 1); j++)
03735             {
03736                 if(grid[i][j].usage > grid[i][j].type->capacity)
03737                     {
03738                         printf
03739                             ("Error:  block at grid location (%d,%d) overused. "
03740                              "Usage is %d\n", i, j, grid[i][j].usage);
03741                         error++;
03742                     }
03743                 usage_check = 0;
03744                 for(k = 0; k < grid[i][j].type->capacity; k++)
03745                     {
03746                         bnum = grid[i][j].blocks[k];
03747                         if(EMPTY == bnum)
03748                             continue;
03749 
03750                         if(block[bnum].type != grid[i][j].type)
03751                             {
03752                                 printf
03753                                     ("Error:  block %d type does not match grid location (%d,%d) type.\n",
03754                                      bnum, i, j);
03755                                 error++;
03756                             }
03757                         if((block[bnum].x != i) || (block[bnum].y != j))
03758                             {
03759                                 printf
03760                                     ("Error:  block %d location conflicts with grid(%d,%d)"
03761                                      "data.\n", bnum, i, j);
03762                                 error++;
03763                             }
03764                         ++usage_check;
03765                         bdone[bnum]++;
03766                     }
03767                 if(usage_check != grid[i][j].usage)
03768                     {
03769                         printf
03770                             ("Error:  Location (%d,%d) usage is %d, but has actual usage %d.\n",
03771                              i, j, grid[i][j].usage, usage_check);
03772                     }
03773             }
03774 
03775     /* Check that every block exists in the grid and block arrays somewhere. */
03776     for(i = 0; i < num_blocks; i++)
03777         if(bdone[i] != 1)
03778             {
03779                 printf
03780                     ("Error:  block %d listed %d times in data structures.\n",
03781                      i, bdone[i]);
03782                 error++;
03783             }
03784     free(bdone);
03785 
03786     if(error == 0)
03787         {
03788             printf
03789                 ("\nCompleted placement consistency check successfully.\n\n");
03790 #ifdef PRINT_REL_POS_DISTR
03791             print_relative_pos_distr();
03792 #endif
03793         }
03794     else
03795         {
03796             printf
03797                 ("\nCompleted placement consistency check, %d Errors found.\n\n",
03798                  error);
03799             printf("Aborting program.\n");
03800             exit(1);
03801         }
03802 }