#include <stdio.h>
#include <math.h>
#include <assert.h>
#include "util.h"
#include "vpr_types.h"
#include "globals.h"
#include "mst.h"
#include "place.h"
#include "read_place.h"
#include "draw.h"
#include "place_and_route.h"
#include "net_delay.h"
#include "path_delay.h"
#include "timing_place_lookup.h"
#include "timing_place.h"
#include "place_stats.h"
Go to the source code of this file.
Defines | |
#define | SMALL_NET 4 |
#define | FROM 0 |
#define | TO 1 |
#define | FROM_AND_TO 2 |
#define | ERROR_TOL .0025 |
#define | MAX_MOVES_BEFORE_RECOMPUTE 50000 |
Enumerations | |
enum | cost_methods { NORMAL, CHECK } |
Functions | |
static void | alloc_and_load_unique_pin_list (void) |
static void | free_unique_pin_list (void) |
static void | alloc_place_regions (int num_regions) |
static void | load_place_regions (int num_regions) |
static void | free_place_regions (int num_regions) |
static void | alloc_and_load_placement_structs (int place_cost_type, int num_regions, float place_cost_exp, float ***old_region_occ_x, float ***old_region_occ_y, struct s_placer_opts placer_opts) |
static void | free_placement_structs (int place_cost_type, int num_regions, float **old_region_occ_x, float **old_region_occ_y, struct s_placer_opts placer_opts) |
static void | alloc_and_load_for_fast_cost_update (float place_cost_exp) |
static void | initial_placement (enum e_pad_loc_type pad_loc_type, char *pad_loc_file) |
static float | comp_bb_cost (int method, int place_cost_type, int num_regions) |
static int | try_swap (float t, float *cost, float *bb_cost, float *timing_cost, float rlim, int place_cost_type, float **old_region_occ_x, float **old_region_occ_y, int num_regions, boolean fixed_pins, enum e_place_algorithm place_algorithm, float timing_tradeoff, float inverse_prev_bb_cost, float inverse_prev_timing_cost, float *delay_cost, int *x_lookup) |
static void | check_place (float bb_cost, float timing_cost, int place_cost_type, int num_regions, enum e_place_algorithm place_algorithm, float delay_cost) |
static float | starting_t (float *cost_ptr, float *bb_cost_ptr, float *timing_cost_ptr, int place_cost_type, float **old_region_occ_x, float **old_region_occ_y, int num_regions, boolean fixed_pins, struct s_annealing_sched annealing_sched, int max_moves, float rlim, enum e_place_algorithm place_algorithm, float timing_tradeoff, float inverse_prev_bb_cost, float inverse_prev_timing_cost, float *delay_cost_ptr) |
static void | update_t (float *t, float std_dev, float rlim, float success_rat, struct s_annealing_sched annealing_sched) |
static void | update_rlim (float *rlim, float success_rat) |
static int | exit_crit (float t, float cost, struct s_annealing_sched annealing_sched) |
static int | count_connections (void) |
static void | compute_net_pin_index_values (void) |
static double | get_std_dev (int n, double sum_x_squared, double av_x) |
static void | free_fast_cost_update_structs (void) |
static float | recompute_bb_cost (int place_cost_type, int num_regions) |
static float | comp_td_point_to_point_delay (int inet, int ipin) |
static void | update_td_cost (int b_from, int b_to, int num_of_pins) |
static void | comp_delta_td_cost (int b_from, int b_to, int num_of_pins, float *delta_timing, float *delta_delay) |
static void | comp_td_costs (float *timing_cost, float *connection_delay_sum) |
static int | assess_swap (float delta_c, float t) |
static boolean | find_to (int x_from, int y_from, t_type_ptr type, float rlim, int *x_lookup, int *x_to, int *y_to) |
static void | get_non_updateable_bb (int inet, struct s_bb *bb_coord_new) |
static void | update_bb (int inet, struct s_bb *bb_coord_new, struct s_bb *bb_edge_new, int xold, int yold, int xnew, int ynew) |
static int | find_affected_nets (int *nets_to_update, int *net_block_moved, int b_from, int b_to, int num_of_pins) |
static float | get_net_cost (int inet, struct s_bb *bb_ptr) |
static float | nonlinear_cong_cost (int num_regions) |
static void | update_region_occ (int inet, struct s_bb *coords, int add_or_sub, int num_regions) |
static void | save_region_occ (float **old_region_occ_x, float **old_region_occ_y, int num_regions) |
static void | restore_region_occ (float **old_region_occ_x, float **old_region_occ_y, int num_regions) |
static void | get_bb_from_scratch (int inet, struct s_bb *coords, struct s_bb *num_on_edges) |
static double | get_net_wirelength_estimate (int inet, struct s_bb *bbptr) |
void | try_place (struct s_placer_opts placer_opts, struct s_annealing_sched annealing_sched, t_chan_width_dist chan_width_dist, struct s_router_opts router_opts, struct s_det_routing_arch det_routing_arch, t_segment_inf *segment_inf, t_timing_inf timing_inf, t_subblock_data *subblock_data_ptr, t_mst_edge ***mst) |
Variables | |
static int * | duplicate_pins |
static int ** | unique_pin_list |
static float * | net_cost = NULL |
static float * | temp_net_cost = NULL |
static float ** | point_to_point_timing_cost = NULL |
static float ** | temp_point_to_point_timing_cost = NULL |
static float ** | point_to_point_delay_cost = NULL |
static float ** | temp_point_to_point_delay_cost = NULL |
static int ** | net_pin_index = NULL |
static struct s_bb * | bb_coords = NULL |
static struct s_bb * | bb_num_on_edges = NULL |
static struct s_place_region ** | place_region_x |
static struct s_place_region ** | place_region_y |
static float * | place_region_bounds_x |
static float * | place_region_bounds_y |
static float ** | chanx_place_cost_fac |
static float ** | chany_place_cost_fac |
static const float | cross_count [50] |
enum cost_methods |
static void alloc_and_load_for_fast_cost_update | ( | float | place_cost_exp | ) | [static] |
Definition at line 3726 of file place.c.
03727 { 03728 03729 /* Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac * 03730 * arrays with the inverse of the average number of tracks per channel * 03731 * between [subhigh] and [sublow]. This is only useful for the cost * 03732 * function that takes the length of the net bounding box in each * 03733 * dimension divided by the average number of tracks in that direction. * 03734 * For other cost functions, you don't have to bother calling this * 03735 * routine; when using the cost function described above, however, you * 03736 * must always call this routine after you call init_chan and before * 03737 * you do any placement cost determination. The place_cost_exp factor * 03738 * specifies to what power the width of the channel should be taken -- * 03739 * larger numbers make narrower channels more expensive. */ 03740 03741 int low, high, i; 03742 03743 /* Access arrays below as chan?_place_cost_fac[subhigh][sublow]. Since * 03744 * subhigh must be greater than or equal to sublow, we only need to * 03745 * allocate storage for the lower half of a matrix. */ 03746 03747 chanx_place_cost_fac = (float **)my_malloc((ny + 1) * sizeof(float *)); 03748 for(i = 0; i <= ny; i++) 03749 chanx_place_cost_fac[i] = (float *)my_malloc((i + 1) * sizeof(float)); 03750 03751 chany_place_cost_fac = (float **)my_malloc((nx + 1) * sizeof(float *)); 03752 for(i = 0; i <= nx; i++) 03753 chany_place_cost_fac[i] = (float *)my_malloc((i + 1) * sizeof(float)); 03754 03755 03756 /* First compute the number of tracks between channel high and channel * 03757 * low, inclusive, in an efficient manner. */ 03758 03759 chanx_place_cost_fac[0][0] = chan_width_x[0]; 03760 03761 for(high = 1; high <= ny; high++) 03762 { 03763 chanx_place_cost_fac[high][high] = chan_width_x[high]; 03764 for(low = 0; low < high; low++) 03765 { 03766 chanx_place_cost_fac[high][low] = 03767 chanx_place_cost_fac[high - 1][low] + 03768 chan_width_x[high]; 03769 } 03770 } 03771 03772 /* Now compute the inverse of the average number of tracks per channel * 03773 * between high and low. The cost function divides by the average * 03774 * number of tracks per channel, so by storing the inverse I convert * 03775 * this to a faster multiplication. Take this final number to the * 03776 * place_cost_exp power -- numbers other than one mean this is no * 03777 * longer a simple "average number of tracks"; it is some power of * 03778 * that, allowing greater penalization of narrow channels. */ 03779 03780 for(high = 0; high <= ny; high++) 03781 for(low = 0; low <= high; low++) 03782 { 03783 chanx_place_cost_fac[high][low] = (high - low + 1.) / 03784 chanx_place_cost_fac[high][low]; 03785 chanx_place_cost_fac[high][low] = 03786 pow((double)chanx_place_cost_fac[high][low], 03787 (double)place_cost_exp); 03788 } 03789 03790 03791 /* Now do the same thing for the y-directed channels. First get the * 03792 * number of tracks between channel high and channel low, inclusive. */ 03793 03794 chany_place_cost_fac[0][0] = chan_width_y[0]; 03795 03796 for(high = 1; high <= nx; high++) 03797 { 03798 chany_place_cost_fac[high][high] = chan_width_y[high]; 03799 for(low = 0; low < high; low++) 03800 { 03801 chany_place_cost_fac[high][low] = 03802 chany_place_cost_fac[high - 1][low] + 03803 chan_width_y[high]; 03804 } 03805 } 03806 03807 /* Now compute the inverse of the average number of tracks per channel * 03808 * between high and low. Take to specified power. */ 03809 03810 for(high = 0; high <= nx; high++) 03811 for(low = 0; low <= high; low++) 03812 { 03813 chany_place_cost_fac[high][low] = (high - low + 1.) / 03814 chany_place_cost_fac[high][low]; 03815 chany_place_cost_fac[high][low] = 03816 pow((double)chany_place_cost_fac[high][low], 03817 (double)place_cost_exp); 03818 } 03819 }
static void alloc_and_load_placement_structs | ( | int | place_cost_type, | |
int | num_regions, | |||
float | place_cost_exp, | |||
float *** | old_region_occ_x, | |||
float *** | old_region_occ_y, | |||
struct s_placer_opts | placer_opts | |||
) | [static] |
Definition at line 2708 of file place.c.
02714 { 02715 02716 /* Allocates the major structures needed only by the placer, primarily for * 02717 * computing costs quickly and such. */ 02718 02719 int inet, ipin, max_pins_per_fb, i; 02720 02721 max_pins_per_fb = 0; 02722 for(i = 0; i < num_types; i++) 02723 { 02724 max_pins_per_fb = 02725 max(max_pins_per_fb, type_descriptors[i].num_pins); 02726 } 02727 02728 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 02729 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || 02730 placer_opts.enable_timing_computations) 02731 { 02732 /*allocate structures associated with timing driven placement */ 02733 /* [0..num_nets-1][1..num_pins-1] */ 02734 point_to_point_delay_cost = 02735 (float **)my_malloc(num_nets * sizeof(float *)); 02736 temp_point_to_point_delay_cost = 02737 (float **)my_malloc(num_nets * sizeof(float *)); 02738 02739 point_to_point_timing_cost = 02740 (float **)my_malloc(num_nets * sizeof(float *)); 02741 temp_point_to_point_timing_cost = 02742 (float **)my_malloc(num_nets * sizeof(float *)); 02743 02744 net_pin_index = 02745 (int **)alloc_matrix(0, num_blocks - 1, 0, 02746 max_pins_per_fb - 1, sizeof(int)); 02747 02748 for(inet = 0; inet < num_nets; inet++) 02749 { 02750 02751 /* in the following, subract one so index starts at * 02752 * 1 instead of 0 */ 02753 point_to_point_delay_cost[inet] = 02754 (float *)my_malloc(net[inet].num_sinks * 02755 sizeof(float)); 02756 point_to_point_delay_cost[inet]--; 02757 02758 temp_point_to_point_delay_cost[inet] = 02759 (float *)my_malloc(net[inet].num_sinks * 02760 sizeof(float)); 02761 temp_point_to_point_delay_cost[inet]--; 02762 02763 point_to_point_timing_cost[inet] = 02764 (float *)my_malloc(net[inet].num_sinks * 02765 sizeof(float)); 02766 point_to_point_timing_cost[inet]--; 02767 02768 temp_point_to_point_timing_cost[inet] = 02769 (float *)my_malloc(net[inet].num_sinks * 02770 sizeof(float)); 02771 temp_point_to_point_timing_cost[inet]--; 02772 } 02773 for(inet = 0; inet < num_nets; inet++) 02774 { 02775 for(ipin = 1; ipin <= net[inet].num_sinks; ipin++) 02776 { 02777 point_to_point_delay_cost[inet][ipin] = 0; 02778 temp_point_to_point_delay_cost[inet][ipin] = 0; 02779 } 02780 } 02781 } 02782 02783 02784 02785 02786 02787 net_cost = (float *)my_malloc(num_nets * sizeof(float)); 02788 temp_net_cost = (float *)my_malloc(num_nets * sizeof(float)); 02789 02790 /* Used to store costs for moves not yet made and to indicate when a net's * 02791 * cost has been recomputed. temp_net_cost[inet] < 0 means net's cost hasn't * 02792 * been recomputed. */ 02793 02794 for(inet = 0; inet < num_nets; inet++) 02795 temp_net_cost[inet] = -1.; 02796 02797 bb_coords = (struct s_bb *)my_malloc(num_nets * sizeof(struct s_bb)); 02798 bb_num_on_edges = 02799 (struct s_bb *)my_malloc(num_nets * sizeof(struct s_bb)); 02800 02801 /* Get a list of pins with no duplicates. */ 02802 02803 alloc_and_load_unique_pin_list(); 02804 02805 /* Allocate storage for subregion data, if needed. */ 02806 02807 if(place_cost_type == NONLINEAR_CONG) 02808 { 02809 alloc_place_regions(num_regions); 02810 load_place_regions(num_regions); 02811 *old_region_occ_x = (float **)alloc_matrix(0, num_regions - 1, 0, 02812 num_regions - 1, 02813 sizeof(float)); 02814 *old_region_occ_y = 02815 (float **)alloc_matrix(0, num_regions - 1, 0, num_regions - 1, 02816 sizeof(float)); 02817 } 02818 else 02819 { /* Shouldn't use them; crash hard if I do! */ 02820 *old_region_occ_x = NULL; 02821 *old_region_occ_y = NULL; 02822 } 02823 02824 if(place_cost_type == LINEAR_CONG) 02825 { 02826 alloc_and_load_for_fast_cost_update(place_cost_exp); 02827 } 02828 }
static void alloc_and_load_unique_pin_list | ( | void | ) | [static] |
Definition at line 3003 of file place.c.
03004 { 03005 03006 /* This routine looks for multiple pins going to the same block in the * 03007 * pinlist of each net. If it finds any, it marks that net as having * 03008 * duplicate pins, and creates a new pinlist with no duplicates. This * 03009 * is then used by the updatable bounding box calculation routine for * 03010 * efficiency. */ 03011 03012 int inet, ipin, bnum, num_dup, any_dups, offset; 03013 int *times_listed; /* [0..num_blocks-1]: number of times a block is * 03014 * * listed in the pinlist of a net. Temp. storage. */ 03015 03016 duplicate_pins = my_calloc(num_nets, sizeof(int)); 03017 times_listed = my_calloc(num_blocks, sizeof(int)); 03018 any_dups = 0; 03019 03020 for(inet = 0; inet < num_nets; inet++) 03021 { 03022 03023 num_dup = 0; 03024 03025 for(ipin = 0; ipin <= net[inet].num_sinks; ipin++) 03026 { 03027 bnum = net[inet].node_block[ipin]; 03028 times_listed[bnum]++; 03029 if(times_listed[bnum] > 1) 03030 num_dup++; 03031 } 03032 03033 if(num_dup > 0) 03034 { /* Duplicates found. Make unique pin list. */ 03035 duplicate_pins[inet] = num_dup; 03036 03037 if(any_dups == 0) 03038 { /* This is the first duplicate found */ 03039 unique_pin_list = 03040 (int **)my_calloc(num_nets, sizeof(int *)); 03041 any_dups = 1; 03042 } 03043 03044 unique_pin_list[inet] = 03045 my_malloc((net[inet].num_sinks + 1 - 03046 num_dup) * sizeof(int)); 03047 03048 offset = 0; 03049 for(ipin = 0; ipin <= net[inet].num_sinks; ipin++) 03050 { 03051 bnum = net[inet].node_block[ipin]; 03052 if(times_listed[bnum] != 0) 03053 { 03054 times_listed[bnum] = 0; 03055 unique_pin_list[inet][offset] = bnum; 03056 offset++; 03057 } 03058 } 03059 } 03060 03061 else 03062 { /* No duplicates found. Reset times_listed. */ 03063 for(ipin = 0; ipin <= net[inet].num_sinks; ipin++) 03064 { 03065 bnum = net[inet].node_block[ipin]; 03066 times_listed[bnum] = 0; 03067 } 03068 } 03069 } 03070 03071 free((void *)times_listed); 03072 }
static void alloc_place_regions | ( | int | num_regions | ) | [static] |
Definition at line 2832 of file place.c.
02833 { 02834 02835 /* Allocates memory for the regional occupancy, cost, etc. counts * 02836 * kept when we're using the NONLINEAR_CONG placement cost * 02837 * function. */ 02838 02839 place_region_x = 02840 (struct s_place_region **)alloc_matrix(0, num_regions - 1, 0, 02841 num_regions - 1, 02842 sizeof(struct s_place_region)); 02843 02844 place_region_y = 02845 (struct s_place_region **)alloc_matrix(0, num_regions - 1, 0, 02846 num_regions - 1, 02847 sizeof(struct s_place_region)); 02848 02849 place_region_bounds_x = (float *)my_malloc((num_regions + 1) * 02850 sizeof(float)); 02851 02852 place_region_bounds_y = (float *)my_malloc((num_regions + 1) * 02853 sizeof(float)); 02854 }
static int assess_swap | ( | float | delta_c, | |
float | t | |||
) | [static] |
Definition at line 1870 of file place.c.
01872 { 01873 01874 /* Returns: 1 -> move accepted, 0 -> rejected. */ 01875 01876 int accept; 01877 float prob_fac, fnum; 01878 01879 if(delta_c <= 0) 01880 { 01881 01882 #ifdef SPEC /* Reduce variation in final solution due to round off */ 01883 fnum = my_frand(); 01884 #endif 01885 01886 accept = 1; 01887 return (accept); 01888 } 01889 01890 if(t == 0.) 01891 return (0); 01892 01893 fnum = my_frand(); 01894 prob_fac = exp(-delta_c / t); 01895 if(prob_fac > fnum) 01896 { 01897 accept = 1; 01898 } 01899 else 01900 { 01901 accept = 0; 01902 } 01903 return (accept); 01904 }
static void check_place | ( | float | bb_cost, | |
float | timing_cost, | |||
int | place_cost_type, | |||
int | num_regions, | |||
enum e_place_algorithm | place_algorithm, | |||
float | delay_cost | |||
) | [static] |
Definition at line 3823 of file place.c.
03829 { 03830 03831 /* Checks that the placement has not confused our data structures. * 03832 * i.e. the clb and block structures agree about the locations of * 03833 * every block, blocks are in legal spots, etc. Also recomputes * 03834 * the final placement cost from scratch and makes sure it is * 03835 * within roundoff of what we think the cost is. */ 03836 03837 static int *bdone; 03838 int i, j, k, error = 0, bnum; 03839 float bb_cost_check; 03840 int usage_check; 03841 float timing_cost_check, delay_cost_check; 03842 03843 bb_cost_check = comp_bb_cost(CHECK, place_cost_type, num_regions); 03844 printf("bb_cost recomputed from scratch is %g.\n", bb_cost_check); 03845 if(fabs(bb_cost_check - bb_cost) > bb_cost * ERROR_TOL) 03846 { 03847 printf 03848 ("Error: bb_cost_check: %g and bb_cost: %g differ in check_place.\n", 03849 bb_cost_check, bb_cost); 03850 error++; 03851 } 03852 03853 if(place_algorithm == NET_TIMING_DRIVEN_PLACE || 03854 place_algorithm == PATH_TIMING_DRIVEN_PLACE) 03855 { 03856 comp_td_costs(&timing_cost_check, &delay_cost_check); 03857 printf("timing_cost recomputed from scratch is %g. \n", 03858 timing_cost_check); 03859 if(fabs(timing_cost_check - timing_cost) > 03860 timing_cost * ERROR_TOL) 03861 { 03862 printf("Error: timing_cost_check: %g and timing_cost: " 03863 "%g differ in check_place.\n", 03864 timing_cost_check, timing_cost); 03865 error++; 03866 } 03867 printf("delay_cost recomputed from scratch is %g. \n", 03868 delay_cost_check); 03869 if(fabs(delay_cost_check - delay_cost) > delay_cost * ERROR_TOL) 03870 { 03871 printf("Error: delay_cost_check: %g and delay_cost: " 03872 "%g differ in check_place.\n", 03873 delay_cost_check, delay_cost); 03874 error++; 03875 } 03876 } 03877 03878 bdone = (int *)my_malloc(num_blocks * sizeof(int)); 03879 for(i = 0; i < num_blocks; i++) 03880 bdone[i] = 0; 03881 03882 /* Step through grid array. Check it against block array. */ 03883 for(i = 0; i <= (nx + 1); i++) 03884 for(j = 0; j <= (ny + 1); j++) 03885 { 03886 if(grid[i][j].usage > grid[i][j].type->capacity) 03887 { 03888 printf 03889 ("Error: block at grid location (%d,%d) overused. " 03890 "Usage is %d\n", i, j, grid[i][j].usage); 03891 error++; 03892 } 03893 usage_check = 0; 03894 for(k = 0; k < grid[i][j].type->capacity; k++) 03895 { 03896 bnum = grid[i][j].blocks[k]; 03897 if(EMPTY == bnum) 03898 continue; 03899 03900 if(block[bnum].type != grid[i][j].type) 03901 { 03902 printf 03903 ("Error: block %d type does not match grid location (%d,%d) type.\n", 03904 bnum, i, j); 03905 error++; 03906 } 03907 if((block[bnum].x != i) || (block[bnum].y != j)) 03908 { 03909 printf 03910 ("Error: block %d location conflicts with grid(%d,%d)" 03911 "data.\n", bnum, i, j); 03912 error++; 03913 } 03914 ++usage_check; 03915 bdone[bnum]++; 03916 } 03917 if(usage_check != grid[i][j].usage) 03918 { 03919 printf 03920 ("Error: Location (%d,%d) usage is %d, but has actual usage %d.\n", 03921 i, j, grid[i][j].usage, usage_check); 03922 } 03923 } 03924 03925 /* Check that every block exists in the grid and block arrays somewhere. */ 03926 for(i = 0; i < num_blocks; i++) 03927 if(bdone[i] != 1) 03928 { 03929 printf 03930 ("Error: block %d listed %d times in data structures.\n", 03931 i, bdone[i]); 03932 error++; 03933 } 03934 free(bdone); 03935 03936 if(error == 0) 03937 { 03938 printf 03939 ("\nCompleted placement consistency check successfully.\n\n"); 03940 #ifdef PRINT_REL_POS_DISTR 03941 print_relative_pos_distr(); 03942 #endif 03943 } 03944 else 03945 { 03946 printf 03947 ("\nCompleted placement consistency check, %d Errors found.\n\n", 03948 error); 03949 printf("Aborting program.\n"); 03950 exit(1); 03951 } 03952 }
static float comp_bb_cost | ( | int | method, | |
int | place_cost_type, | |||
int | num_regions | |||
) | [static] |
Definition at line 2363 of file place.c.
02366 { 02367 02368 /* Finds the cost from scratch. Done only when the placement * 02369 * has been radically changed (i.e. after initial placement). * 02370 * Otherwise find the cost change incrementally. If method * 02371 * check is NORMAL, we find bounding boxes that are updateable * 02372 * for the larger nets. If method is CHECK, all bounding boxes * 02373 * are found via the non_updateable_bb routine, to provide a * 02374 * cost which can be used to check the correctness of the * 02375 * other routine. */ 02376 02377 int i, j, k; 02378 float cost; 02379 double expected_wirelength; 02380 02381 cost = 0; 02382 expected_wirelength = 0.0; 02383 02384 /* Initialize occupancies to zero if regions are being used. */ 02385 02386 if(place_cost_type == NONLINEAR_CONG) 02387 { 02388 for(i = 0; i < num_regions; i++) 02389 { 02390 for(j = 0; j < num_regions; j++) 02391 { 02392 place_region_x[i][j].occupancy = 0.; 02393 place_region_y[i][j].occupancy = 0.; 02394 } 02395 } 02396 } 02397 02398 for(k = 0; k < num_nets; k++) 02399 { /* for each net ... */ 02400 02401 if(net[k].is_global == FALSE) 02402 { /* Do only if not global. */ 02403 02404 /* Small nets don't use incremental updating on their bounding boxes, * 02405 * so they can use a fast bounding box calculator. */ 02406 02407 if(net[k].num_sinks >= SMALL_NET && method == NORMAL) 02408 { 02409 get_bb_from_scratch(k, &bb_coords[k], 02410 &bb_num_on_edges[k]); 02411 } 02412 else 02413 { 02414 get_non_updateable_bb(k, &bb_coords[k]); 02415 } 02416 02417 if(place_cost_type != NONLINEAR_CONG) 02418 { 02419 net_cost[k] = get_net_cost(k, &bb_coords[k]); 02420 cost += net_cost[k]; 02421 if(method == CHECK) 02422 expected_wirelength += 02423 get_net_wirelength_estimate(k, 02424 &bb_coords 02425 [k]); 02426 } 02427 else 02428 { /* Must be nonlinear_cong case. */ 02429 update_region_occ(k, &bb_coords[k], 1, 02430 num_regions); 02431 } 02432 } 02433 } 02434 02435 if(place_cost_type == NONLINEAR_CONG) 02436 { 02437 cost = nonlinear_cong_cost(num_regions); 02438 } 02439 02440 if(method == CHECK) 02441 printf("BB estimate of min-dist (placement) wirelength is ;%.0f\n", 02442 expected_wirelength); 02443 02444 return (cost); 02445 }
static void comp_delta_td_cost | ( | int | b_from, | |
int | b_to, | |||
int | num_of_pins, | |||
float * | delta_timing, | |||
float * | delta_delay | |||
) | [static] |
Definition at line 2149 of file place.c.
02154 { 02155 02156 /*a net that is being driven by a moved block must have all of its */ 02157 /*sink timing costs recomputed. A net that is driving a moved block */ 02158 /*must only have the timing cost on the connection driving the input */ 02159 /*pin computed */ 02160 02161 int inet, k, net_pin, ipin; 02162 float delta_timing_cost, delta_delay_cost, temp_delay; 02163 02164 delta_timing_cost = 0.; 02165 delta_delay_cost = 0.; 02166 02167 02168 for(k = 0; k < num_of_pins; k++) 02169 { 02170 inet = block[b_from].nets[k]; 02171 02172 if(inet == OPEN) 02173 continue; 02174 02175 if(net[inet].is_global) 02176 continue; 02177 02178 net_pin = net_pin_index[b_from][k]; 02179 02180 if(net_pin != 0) 02181 { /*this net is driving a moved block */ 02182 02183 /*if this net is being driven by a block that has moved, we do not */ 02184 /*need to compute the change in the timing cost (here) since it will */ 02185 /*be computed in the fanout of the net on the driving block, also */ 02186 /*computing it here would double count the change, and mess up the */ 02187 /*delta_timing_cost value */ 02188 if(net[inet].node_block[0] != b_to 02189 && net[inet].node_block[0] != b_from) 02190 { 02191 temp_delay = 02192 comp_td_point_to_point_delay(inet, net_pin); 02193 02194 temp_point_to_point_delay_cost[inet][net_pin] = 02195 temp_delay; 02196 temp_point_to_point_timing_cost[inet][net_pin] = 02197 timing_place_crit[inet][net_pin] * temp_delay; 02198 02199 delta_delay_cost += 02200 temp_point_to_point_delay_cost[inet][net_pin] 02201 - point_to_point_delay_cost[inet][net_pin]; 02202 02203 delta_timing_cost += 02204 temp_point_to_point_timing_cost[inet][net_pin] 02205 - point_to_point_timing_cost[inet][net_pin]; 02206 } 02207 } 02208 else 02209 { /*this net is being driven by a moved block, recompute */ 02210 /*all point to point connections on this net. */ 02211 for(ipin = 1; ipin <= net[inet].num_sinks; ipin++) 02212 { 02213 temp_delay = 02214 comp_td_point_to_point_delay(inet, ipin); 02215 02216 temp_point_to_point_delay_cost[inet][ipin] = 02217 temp_delay; 02218 temp_point_to_point_timing_cost[inet][ipin] = 02219 timing_place_crit[inet][ipin] * temp_delay; 02220 02221 delta_delay_cost += 02222 temp_point_to_point_delay_cost[inet][ipin] - 02223 point_to_point_delay_cost[inet][ipin]; 02224 02225 delta_timing_cost += 02226 temp_point_to_point_timing_cost[inet][ipin] - 02227 point_to_point_timing_cost[inet][ipin]; 02228 } 02229 } 02230 } 02231 02232 if(b_to != EMPTY) 02233 { 02234 for(k = 0; k < num_of_pins; k++) 02235 { 02236 inet = block[b_to].nets[k]; 02237 02238 if(inet == OPEN) 02239 continue; 02240 02241 if(net[inet].is_global) 02242 continue; 02243 02244 net_pin = net_pin_index[b_to][k]; 02245 02246 if(net_pin != 0) 02247 { /*this net is driving a moved block */ 02248 02249 /*if this net is being driven by a block that has moved, we do not */ 02250 /*need to compute the change in the timing cost (here) since it was */ 02251 /*computed in the fanout of the net on the driving block, also */ 02252 /*computing it here would double count the change, and mess up the */ 02253 /*delta_timing_cost value */ 02254 if(net[inet].node_block[0] != b_to 02255 && net[inet].node_block[0] != b_from) 02256 { 02257 temp_delay = 02258 comp_td_point_to_point_delay(inet, 02259 net_pin); 02260 02261 temp_point_to_point_delay_cost[inet] 02262 [net_pin] = temp_delay; 02263 temp_point_to_point_timing_cost[inet] 02264 [net_pin] = 02265 timing_place_crit[inet][net_pin] * 02266 temp_delay; 02267 02268 delta_delay_cost += 02269 temp_point_to_point_delay_cost[inet] 02270 [net_pin] - 02271 point_to_point_delay_cost[inet] 02272 [net_pin]; 02273 delta_timing_cost += 02274 temp_point_to_point_timing_cost[inet] 02275 [net_pin] - 02276 point_to_point_timing_cost[inet] 02277 [net_pin]; 02278 } 02279 } 02280 else 02281 { /*this net is being driven by a moved block, recompute */ 02282 /*all point to point connections on this net. */ 02283 for(ipin = 1; ipin <= net[inet].num_sinks; ipin++) 02284 { 02285 02286 temp_delay = 02287 comp_td_point_to_point_delay(inet, 02288 ipin); 02289 02290 temp_point_to_point_delay_cost[inet][ipin] 02291 = temp_delay; 02292 temp_point_to_point_timing_cost[inet] 02293 [ipin] = 02294 timing_place_crit[inet][ipin] * 02295 temp_delay; 02296 02297 02298 delta_delay_cost += 02299 temp_point_to_point_delay_cost[inet] 02300 [ipin] - 02301 point_to_point_delay_cost[inet][ipin]; 02302 delta_timing_cost += 02303 temp_point_to_point_timing_cost[inet] 02304 [ipin] - 02305 point_to_point_timing_cost[inet] 02306 [ipin]; 02307 } 02308 } 02309 } 02310 } 02311 02312 *delta_timing = delta_timing_cost; 02313 *delta_delay = delta_delay_cost; 02314 }
static void comp_td_costs | ( | float * | timing_cost, | |
float * | connection_delay_sum | |||
) | [static] |
Definition at line 2317 of file place.c.
02319 { 02320 /*computes the cost (from scratch) due to the delays and criticalities* 02321 *on all point to point connections, we define the timing cost of * 02322 *each connection as criticality*delay */ 02323 02324 int inet, ipin; 02325 float loc_timing_cost, loc_connection_delay_sum, temp_delay_cost, 02326 temp_timing_cost; 02327 02328 loc_timing_cost = 0.; 02329 loc_connection_delay_sum = 0.; 02330 02331 for(inet = 0; inet < num_nets; inet++) 02332 { /* for each net ... */ 02333 if(net[inet].is_global == FALSE) 02334 { /* Do only if not global. */ 02335 02336 for(ipin = 1; ipin <= net[inet].num_sinks; ipin++) 02337 { 02338 02339 temp_delay_cost = 02340 comp_td_point_to_point_delay(inet, ipin); 02341 temp_timing_cost = 02342 temp_delay_cost * 02343 timing_place_crit[inet][ipin]; 02344 02345 loc_connection_delay_sum += temp_delay_cost; 02346 point_to_point_delay_cost[inet][ipin] = 02347 temp_delay_cost; 02348 temp_point_to_point_delay_cost[inet][ipin] = -1; /*undefined */ 02349 02350 point_to_point_timing_cost[inet][ipin] = 02351 temp_timing_cost; 02352 temp_point_to_point_timing_cost[inet][ipin] = -1; /*undefined */ 02353 loc_timing_cost += temp_timing_cost; 02354 } 02355 } 02356 } 02357 *timing_cost = loc_timing_cost; 02358 *connection_delay_sum = loc_connection_delay_sum; 02359 }
static float comp_td_point_to_point_delay | ( | int | inet, | |
int | ipin | |||
) | [static] |
Definition at line 1965 of file place.c.
01967 { 01968 01969 /*returns the delay of one point to point connection */ 01970 01971 int source_block, sink_block; 01972 int delta_x, delta_y; 01973 t_type_ptr source_type, sink_type; 01974 float delay_source_to_sink; 01975 01976 delay_source_to_sink = 0.; 01977 01978 source_block = net[inet].node_block[0]; 01979 source_type = block[source_block].type; 01980 01981 sink_block = net[inet].node_block[ipin]; 01982 sink_type = block[sink_block].type; 01983 01984 assert(source_type != NULL); 01985 assert(sink_type != NULL); 01986 01987 delta_x = abs(block[sink_block].x - block[source_block].x); 01988 delta_y = abs(block[sink_block].y - block[source_block].y); 01989 01990 /* TODO low priority: Could be merged into one look-up table */ 01991 /* Note: This heuristic is terrible on Quality of Results. 01992 * A much better heuristic is to create a more comprehensive lookup table but 01993 * it's too late in the release cycle to do this. Pushing until the next release */ 01994 if(source_type == IO_TYPE) 01995 { 01996 if(sink_type == IO_TYPE) 01997 delay_source_to_sink = delta_io_to_io[delta_x][delta_y]; 01998 else 01999 delay_source_to_sink = delta_io_to_fb[delta_x][delta_y]; 02000 } 02001 else 02002 { 02003 if(sink_type == IO_TYPE) 02004 delay_source_to_sink = delta_fb_to_io[delta_x][delta_y]; 02005 else 02006 delay_source_to_sink = delta_fb_to_fb[delta_x][delta_y]; 02007 } 02008 if(delay_source_to_sink < 0) 02009 { 02010 printf 02011 ("Error in comp_td_point_to_point_delay in place.c, bad delay_source_to_sink value\n"); 02012 exit(1); 02013 } 02014 02015 if(delay_source_to_sink < 0.) 02016 { 02017 printf 02018 ("Error in comp_td_point_to_point_delay in place.c, delay is less than 0\n"); 02019 exit(1); 02020 } 02021 02022 return (delay_source_to_sink); 02023 }
static void compute_net_pin_index_values | ( | void | ) | [static] |
Definition at line 996 of file place.c.
00997 { 00998 /*computes net_pin_index array, this array allows us to quickly */ 00999 /*find what pin on the net a block pin corresponds to */ 01000 01001 int inet, netpin, blk, iblk, ipin; 01002 t_type_ptr type; 01003 01004 /*initialize values to OPEN */ 01005 for(iblk = 0; iblk < num_blocks; iblk++) 01006 { 01007 type = block[iblk].type; 01008 for(ipin = 0; ipin < type->num_pins; ipin++) 01009 { 01010 net_pin_index[iblk][ipin] = OPEN; 01011 } 01012 } 01013 01014 for(inet = 0; inet < num_nets; inet++) 01015 { 01016 01017 if(net[inet].is_global) 01018 continue; 01019 01020 for(netpin = 0; netpin <= net[inet].num_sinks; netpin++) 01021 { 01022 blk = net[inet].node_block[netpin]; 01023 net_pin_index[blk][net[inet].node_block_pin[netpin]] = 01024 netpin; 01025 } 01026 } 01027 }
static int count_connections | ( | void | ) | [static] |
Definition at line 976 of file place.c.
00977 { 00978 /*only count non-global connections */ 00979 00980 int count, inet; 00981 00982 count = 0; 00983 00984 for(inet = 0; inet < num_nets; inet++) 00985 { 00986 00987 if(net[inet].is_global) 00988 continue; 00989 00990 count += net[inet].num_sinks; 00991 } 00992 return (count); 00993 }
static int exit_crit | ( | float | t, | |
float | cost, | |||
struct s_annealing_sched | annealing_sched | |||
) | [static] |
Definition at line 1131 of file place.c.
01134 { 01135 01136 /* Return 1 when the exit criterion is met. */ 01137 01138 if(annealing_sched.type == USER_SCHED) 01139 { 01140 if(t < annealing_sched.exit_t) 01141 { 01142 return (1); 01143 } 01144 else 01145 { 01146 return (0); 01147 } 01148 } 01149 01150 /* Automatic annealing schedule */ 01151 01152 if(t < 0.005 * cost / num_nets) 01153 { 01154 return (1); 01155 } 01156 else 01157 { 01158 return (0); 01159 } 01160 }
static int find_affected_nets | ( | int * | nets_to_update, | |
int * | net_block_moved, | |||
int | b_from, | |||
int | b_to, | |||
int | num_of_pins | |||
) | [static] |
Definition at line 1586 of file place.c.
01591 { 01592 01593 /* Puts a list of all the nets connected to b_from and b_to into * 01594 * nets_to_update. Returns the number of affected nets. Net_block_moved * 01595 * is either FROM, TO or FROM_AND_TO -- the block connected to this net * 01596 * that has moved. */ 01597 01598 int k, inet, affected_index, count; 01599 01600 affected_index = 0; 01601 01602 for(k = 0; k < num_of_pins; k++) 01603 { 01604 inet = block[b_from].nets[k]; 01605 01606 if(inet == OPEN) 01607 continue; 01608 01609 if(net[inet].is_global) 01610 continue; 01611 01612 /* This is here in case the same block connects to a net twice. */ 01613 01614 if(temp_net_cost[inet] > 0.) 01615 continue; 01616 01617 nets_to_update[affected_index] = inet; 01618 net_block_moved[affected_index] = FROM; 01619 affected_index++; 01620 temp_net_cost[inet] = 1.; /* Flag to say we've marked this net. */ 01621 } 01622 01623 if(b_to != EMPTY) 01624 { 01625 for(k = 0; k < num_of_pins; k++) 01626 { 01627 inet = block[b_to].nets[k]; 01628 01629 if(inet == OPEN) 01630 continue; 01631 01632 if(net[inet].is_global) 01633 continue; 01634 01635 if(temp_net_cost[inet] > 0.) 01636 { /* Net already marked. */ 01637 for(count = 0; count < affected_index; count++) 01638 { 01639 if(nets_to_update[count] == inet) 01640 { 01641 if(net_block_moved[count] == FROM) 01642 net_block_moved[count] = 01643 FROM_AND_TO; 01644 break; 01645 } 01646 } 01647 01648 #ifdef DEBUG 01649 if(count > affected_index) 01650 { 01651 printf 01652 ("Error in find_affected_nets -- count = %d," 01653 " affected index = %d.\n", count, 01654 affected_index); 01655 exit(1); 01656 } 01657 #endif 01658 } 01659 01660 else 01661 { /* Net not marked yet. */ 01662 01663 nets_to_update[affected_index] = inet; 01664 net_block_moved[affected_index] = TO; 01665 affected_index++; 01666 temp_net_cost[inet] = 1.; /* Flag means we've marked net. */ 01667 } 01668 } 01669 } 01670 01671 return (affected_index); 01672 }
static boolean find_to | ( | int | x_from, | |
int | y_from, | |||
t_type_ptr | type, | |||
float | rlim, | |||
int * | x_lookup, | |||
int * | x_to, | |||
int * | y_to | |||
) | [static] |
Definition at line 1676 of file place.c.
01683 { 01684 01685 /* Returns the point to which I want to swap, properly range limited. 01686 * rlim must always be between 1 and nx (inclusive) for this routine 01687 * to work. Assumes that a column only contains blocks of the same type. 01688 */ 01689 01690 int x_rel, y_rel, iside, iplace, rlx, rly, min_x, max_x, min_y, max_y; 01691 int num_col_same_type, i, j; 01692 01693 rlx = min(nx, rlim); /* Only needed when nx < ny. */ 01694 rly = min(ny, rlim); /* Added rly for aspect_ratio != 1 case. */ 01695 01696 min_x = max(1, x_from - rlx); 01697 max_x = min(nx, x_from + rlx); 01698 min_y = max(1, y_from - rly); 01699 max_y = min(ny, y_from + rly); 01700 01701 num_col_same_type = 0; 01702 j = 0; 01703 if(type != IO_TYPE) 01704 { 01705 for(i = min_x; i <= max_x; i++) 01706 { 01707 if(grid[i][1].type == type) 01708 { 01709 num_col_same_type++; 01710 x_lookup[j] = i; 01711 j++; 01712 } 01713 } 01714 assert(num_col_same_type != 0); 01715 if(num_col_same_type == 1 && 01716 ((((max_y - min_y) / type->height) - 1) <= 0 01717 || type->height > (ny / 2))) 01718 return FALSE; 01719 } 01720 01721 #ifdef DEBUG 01722 if(rlx < 1 || rlx > nx) 01723 { 01724 printf("Error in find_to: rlx = %d\n", rlx); 01725 exit(1); 01726 } 01727 #endif 01728 01729 do 01730 { /* Until (x_to, y_to) different from (x_from, y_from) */ 01731 if(type == IO_TYPE) 01732 { /* io_block to be moved. */ 01733 if(rlx >= nx) 01734 { 01735 iside = my_irand(3); 01736 /* * 01737 * +-----1----+ * 01738 * | | * 01739 * | | * 01740 * 0 2 * 01741 * | | * 01742 * | | * 01743 * +-----3----+ * 01744 * */ 01745 switch (iside) 01746 { 01747 case 0: 01748 iplace = my_irand(ny - 1) + 1; 01749 *x_to = 0; 01750 *y_to = iplace; 01751 break; 01752 case 1: 01753 iplace = my_irand(nx - 1) + 1; 01754 *x_to = iplace; 01755 *y_to = ny + 1; 01756 break; 01757 case 2: 01758 iplace = my_irand(ny - 1) + 1; 01759 *x_to = nx + 1; 01760 *y_to = iplace; 01761 break; 01762 case 3: 01763 iplace = my_irand(nx - 1) + 1; 01764 *x_to = iplace; 01765 *y_to = 0; 01766 break; 01767 default: 01768 printf 01769 ("Error in find_to. Unexpected io swap location.\n"); 01770 exit(1); 01771 } 01772 } 01773 else 01774 { /* rlx is less than whole chip */ 01775 if(x_from == 0) 01776 { 01777 iplace = my_irand(2 * rly); 01778 *y_to = y_from - rly + iplace; 01779 *x_to = x_from; 01780 if(*y_to > ny) 01781 { 01782 *y_to = ny + 1; 01783 *x_to = my_irand(rlx - 1) + 1; 01784 } 01785 else if(*y_to < 1) 01786 { 01787 *y_to = 0; 01788 *x_to = my_irand(rlx - 1) + 1; 01789 } 01790 } 01791 else if(x_from == nx + 1) 01792 { 01793 iplace = my_irand(2 * rly); 01794 *y_to = y_from - rly + iplace; 01795 *x_to = x_from; 01796 if(*y_to > ny) 01797 { 01798 *y_to = ny + 1; 01799 *x_to = nx - my_irand(rlx - 1); 01800 } 01801 else if(*y_to < 1) 01802 { 01803 *y_to = 0; 01804 *x_to = nx - my_irand(rlx - 1); 01805 } 01806 } 01807 else if(y_from == 0) 01808 { 01809 iplace = my_irand(2 * rlx); 01810 *x_to = x_from - rlx + iplace; 01811 *y_to = y_from; 01812 if(*x_to > nx) 01813 { 01814 *x_to = nx + 1; 01815 *y_to = my_irand(rly - 1) + 1; 01816 } 01817 else if(*x_to < 1) 01818 { 01819 *x_to = 0; 01820 *y_to = my_irand(rly - 1) + 1; 01821 } 01822 } 01823 else 01824 { /* *y_from == ny + 1 */ 01825 iplace = my_irand(2 * rlx); 01826 *x_to = x_from - rlx + iplace; 01827 *y_to = y_from; 01828 if(*x_to > nx) 01829 { 01830 *x_to = nx + 1; 01831 *y_to = ny - my_irand(rly - 1); 01832 } 01833 else if(*x_to < 1) 01834 { 01835 *x_to = 0; 01836 *y_to = ny - my_irand(rly - 1); 01837 } 01838 } 01839 } /* End rlx if */ 01840 } /* end type if */ 01841 else 01842 { 01843 x_rel = my_irand(num_col_same_type - 1); 01844 y_rel = 01845 my_irand(max 01846 (0, ((max_y - min_y) / type->height) - 1)); 01847 *x_to = x_lookup[x_rel]; 01848 *y_to = min_y + y_rel * type->height; 01849 *y_to = (*y_to) - grid[*x_to][*y_to].offset; /* align it */ 01850 assert(*x_to >= 1 && *x_to <= nx); 01851 assert(*y_to >= 1 && *y_to <= ny); 01852 } 01853 } 01854 while((x_from == *x_to) && (y_from == *y_to)); 01855 01856 #ifdef DEBUG 01857 if(*x_to < 0 || *x_to > nx + 1 || *y_to < 0 || *y_to > ny + 1) 01858 { 01859 printf("Error in routine find_to: (x_to,y_to) = (%d,%d)\n", 01860 *x_to, *y_to); 01861 exit(1); 01862 } 01863 #endif 01864 assert(type == grid[*x_to][*y_to].type); 01865 return TRUE; 01866 }
static void free_fast_cost_update_structs | ( | void | ) | [static] |
Definition at line 3705 of file place.c.
03706 { 03707 03708 /* Frees the structures used to speed up evaluation of the nonlinear * 03709 * congestion cost function. */ 03710 03711 int i; 03712 03713 for(i = 0; i <= ny; i++) 03714 free(chanx_place_cost_fac[i]); 03715 03716 free(chanx_place_cost_fac); 03717 03718 for(i = 0; i <= nx; i++) 03719 free(chany_place_cost_fac[i]); 03720 03721 free(chany_place_cost_fac); 03722 }
static void free_place_regions | ( | int | num_regions | ) | [static] |
Definition at line 2619 of file place.c.
02620 { 02621 02622 /* Frees the place_regions data structures needed by the NONLINEAR_CONG * 02623 * cost function. */ 02624 02625 free_matrix(place_region_x, 0, num_regions - 1, 0, sizeof(struct 02626 s_place_region)); 02627 02628 free_matrix(place_region_y, 0, num_regions - 1, 0, sizeof(struct 02629 s_place_region)); 02630 02631 free(place_region_bounds_x); 02632 free(place_region_bounds_y); 02633 }
static void free_placement_structs | ( | int | place_cost_type, | |
int | num_regions, | |||
float ** | old_region_occ_x, | |||
float ** | old_region_occ_y, | |||
struct s_placer_opts | placer_opts | |||
) | [static] |
Definition at line 2637 of file place.c.
02642 { 02643 02644 /* Frees the major structures needed by the placer (and not needed * 02645 * elsewhere). */ 02646 02647 int inet; 02648 02649 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 02650 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || 02651 placer_opts.enable_timing_computations) 02652 { 02653 for(inet = 0; inet < num_nets; inet++) 02654 { 02655 /*add one to the address since it is indexed from 1 not 0 */ 02656 02657 point_to_point_delay_cost[inet]++; 02658 free(point_to_point_delay_cost[inet]); 02659 02660 point_to_point_timing_cost[inet]++; 02661 free(point_to_point_timing_cost[inet]); 02662 02663 temp_point_to_point_delay_cost[inet]++; 02664 free(temp_point_to_point_delay_cost[inet]); 02665 02666 temp_point_to_point_timing_cost[inet]++; 02667 free(temp_point_to_point_timing_cost[inet]); 02668 } 02669 free(point_to_point_delay_cost); 02670 free(temp_point_to_point_delay_cost); 02671 02672 free(point_to_point_timing_cost); 02673 free(temp_point_to_point_timing_cost); 02674 02675 free_matrix(net_pin_index, 0, num_blocks - 1, 0, sizeof(int)); 02676 } 02677 02678 02679 free(net_cost); 02680 free(temp_net_cost); 02681 free(bb_num_on_edges); 02682 free(bb_coords); 02683 02684 net_cost = NULL; /* Defensive coding. */ 02685 temp_net_cost = NULL; 02686 bb_num_on_edges = NULL; 02687 bb_coords = NULL; 02688 02689 free_unique_pin_list(); 02690 02691 if(place_cost_type == NONLINEAR_CONG) 02692 { 02693 free_place_regions(num_regions); 02694 free_matrix(old_region_occ_x, 0, num_regions - 1, 0, 02695 sizeof(float)); 02696 free_matrix(old_region_occ_y, 0, num_regions - 1, 0, 02697 sizeof(float)); 02698 } 02699 02700 else if(place_cost_type == LINEAR_CONG) 02701 { 02702 free_fast_cost_update_structs(); 02703 } 02704 }
static void free_unique_pin_list | ( | void | ) | [static] |
Definition at line 2977 of file place.c.
02978 { 02979 02980 /* Frees the unique pin list structures. */ 02981 02982 int any_dup, inet; 02983 02984 any_dup = 0; 02985 02986 for(inet = 0; inet < num_nets; inet++) 02987 { 02988 if(duplicate_pins[inet] != 0) 02989 { 02990 free(unique_pin_list[inet]); 02991 any_dup = 1; 02992 } 02993 } 02994 02995 if(any_dup != 0) 02996 free(unique_pin_list); 02997 02998 free(duplicate_pins); 02999 }
static void get_bb_from_scratch | ( | int | inet, | |
struct s_bb * | coords, | |||
struct s_bb * | num_on_edges | |||
) | [static] |
Definition at line 3076 of file place.c.
03079 { 03080 03081 /* This routine finds the bounding box of each net from scratch (i.e. * 03082 * from only the block location information). It updates both the * 03083 * coordinate and number of blocks on each edge information. It * 03084 * should only be called when the bounding box information is not valid. */ 03085 03086 int ipin, bnum, x, y, xmin, xmax, ymin, ymax; 03087 int xmin_edge, xmax_edge, ymin_edge, ymax_edge; 03088 int n_pins; 03089 int *plist; 03090 03091 /* I need a list of blocks to which this net connects, with no block listed * 03092 * more than once, in order to get a proper count of the number on the edge * 03093 * of the bounding box. */ 03094 03095 if(duplicate_pins[inet] == 0) 03096 { 03097 plist = net[inet].node_block; 03098 n_pins = net[inet].num_sinks + 1; 03099 } 03100 else 03101 { 03102 plist = unique_pin_list[inet]; 03103 n_pins = (net[inet].num_sinks + 1) - duplicate_pins[inet]; 03104 } 03105 03106 x = block[plist[0]].x; 03107 y = block[plist[0]].y; 03108 03109 x = max(min(x, nx), 1); 03110 y = max(min(y, ny), 1); 03111 03112 xmin = x; 03113 ymin = y; 03114 xmax = x; 03115 ymax = y; 03116 xmin_edge = 1; 03117 ymin_edge = 1; 03118 xmax_edge = 1; 03119 ymax_edge = 1; 03120 03121 for(ipin = 1; ipin < n_pins; ipin++) 03122 { 03123 03124 bnum = plist[ipin]; 03125 x = block[bnum].x; 03126 y = block[bnum].y; 03127 03128 /* Code below counts IO blocks as being within the 1..nx, 1..ny clb array. * 03129 * This is because channels do not go out of the 0..nx, 0..ny range, and * 03130 * I always take all channels impinging on the bounding box to be within * 03131 * that bounding box. Hence, this "movement" of IO blocks does not affect * 03132 * the which channels are included within the bounding box, and it * 03133 * simplifies the code a lot. */ 03134 03135 x = max(min(x, nx), 1); 03136 y = max(min(y, ny), 1); 03137 03138 if(x == xmin) 03139 { 03140 xmin_edge++; 03141 } 03142 if(x == xmax) 03143 { /* Recall that xmin could equal xmax -- don't use else */ 03144 xmax_edge++; 03145 } 03146 else if(x < xmin) 03147 { 03148 xmin = x; 03149 xmin_edge = 1; 03150 } 03151 else if(x > xmax) 03152 { 03153 xmax = x; 03154 xmax_edge = 1; 03155 } 03156 03157 if(y == ymin) 03158 { 03159 ymin_edge++; 03160 } 03161 if(y == ymax) 03162 { 03163 ymax_edge++; 03164 } 03165 else if(y < ymin) 03166 { 03167 ymin = y; 03168 ymin_edge = 1; 03169 } 03170 else if(y > ymax) 03171 { 03172 ymax = y; 03173 ymax_edge = 1; 03174 } 03175 } 03176 03177 /* Copy the coordinates and number on edges information into the proper * 03178 * structures. */ 03179 03180 coords->xmin = xmin; 03181 coords->xmax = xmax; 03182 coords->ymin = ymin; 03183 coords->ymax = ymax; 03184 03185 num_on_edges->xmin = xmin_edge; 03186 num_on_edges->xmax = xmax_edge; 03187 num_on_edges->ymin = ymin_edge; 03188 num_on_edges->ymax = ymax_edge; 03189 }
static float get_net_cost | ( | int | inet, | |
struct s_bb * | bb_ptr | |||
) | [static] |
Definition at line 3237 of file place.c.
03239 { 03240 03241 /* Finds the cost due to one net by looking at its coordinate bounding * 03242 * box. */ 03243 03244 float ncost, crossing; 03245 03246 /* Get the expected "crossing count" of a net, based on its number * 03247 * of pins. Extrapolate for very large nets. */ 03248 03249 if((net[inet].num_sinks + 1) > 50) 03250 { 03251 crossing = 2.7933 + 0.02616 * ((net[inet].num_sinks + 1) - 50); 03252 /* crossing = 3.0; Old value */ 03253 } 03254 else 03255 { 03256 crossing = cross_count[(net[inet].num_sinks + 1) - 1]; 03257 } 03258 03259 /* Could insert a check for xmin == xmax. In that case, assume * 03260 * connection will be made with no bends and hence no x-cost. * 03261 * Same thing for y-cost. */ 03262 03263 /* Cost = wire length along channel * cross_count / average * 03264 * channel capacity. Do this for x, then y direction and add. */ 03265 03266 ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing * 03267 chanx_place_cost_fac[bbptr->ymax][bbptr->ymin - 1]; 03268 03269 ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing * 03270 chany_place_cost_fac[bbptr->xmax][bbptr->xmin - 1]; 03271 03272 return (ncost); 03273 }
static double get_net_wirelength_estimate | ( | int | inet, | |
struct s_bb * | bbptr | |||
) | [static] |
Definition at line 3193 of file place.c.
03195 { 03196 03197 /* WMF: Finds the estimate of wirelength due to one net by looking at * 03198 * its coordinate bounding box. */ 03199 03200 double ncost, crossing; 03201 03202 /* Get the expected "crossing count" of a net, based on its number * 03203 * of pins. Extrapolate for very large nets. */ 03204 03205 if(((net[inet].num_sinks + 1) > 50) && ((net[inet].num_sinks + 1) < 85)) 03206 { 03207 crossing = 2.7933 + 0.02616 * ((net[inet].num_sinks + 1) - 50); 03208 } 03209 else if((net[inet].num_sinks + 1) >= 85) 03210 { 03211 crossing = 03212 2.7933 + 0.011 * (net[inet].num_sinks + 1) - 03213 0.0000018 * (net[inet].num_sinks + 1) * (net[inet].num_sinks + 03214 1); 03215 } 03216 else 03217 { 03218 crossing = cross_count[(net[inet].num_sinks + 1) - 1]; 03219 } 03220 03221 /* Could insert a check for xmin == xmax. In that case, assume * 03222 * connection will be made with no bends and hence no x-cost. * 03223 * Same thing for y-cost. */ 03224 03225 /* Cost = wire length along channel * cross_count / average * 03226 * channel capacity. Do this for x, then y direction and add. */ 03227 03228 ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing; 03229 03230 ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing; 03231 03232 return (ncost); 03233 }
static void get_non_updateable_bb | ( | int | inet, | |
struct s_bb * | bb_coord_new | |||
) | [static] |
Definition at line 3277 of file place.c.
03279 { 03280 03281 /* Finds the bounding box of a net and stores its coordinates in the * 03282 * bb_coord_new data structure. This routine should only be called * 03283 * for small nets, since it does not determine enough information for * 03284 * the bounding box to be updated incrementally later. * 03285 * Currently assumes channels on both sides of the CLBs forming the * 03286 * edges of the bounding box can be used. Essentially, I am assuming * 03287 * the pins always lie on the outside of the bounding box. */ 03288 03289 03290 int k, xmax, ymax, xmin, ymin, x, y; 03291 03292 x = block[net[inet].node_block[0]].x; 03293 y = block[net[inet].node_block[0]].y; 03294 03295 xmin = x; 03296 ymin = y; 03297 xmax = x; 03298 ymax = y; 03299 03300 for(k = 1; k < (net[inet].num_sinks + 1); k++) 03301 { 03302 x = block[net[inet].node_block[k]].x; 03303 y = block[net[inet].node_block[k]].y; 03304 03305 if(x < xmin) 03306 { 03307 xmin = x; 03308 } 03309 else if(x > xmax) 03310 { 03311 xmax = x; 03312 } 03313 03314 if(y < ymin) 03315 { 03316 ymin = y; 03317 } 03318 else if(y > ymax) 03319 { 03320 ymax = y; 03321 } 03322 } 03323 03324 /* Now I've found the coordinates of the bounding box. There are no * 03325 * channels beyond nx and ny, so I want to clip to that. As well, * 03326 * since I'll always include the channel immediately below and the * 03327 * channel immediately to the left of the bounding box, I want to * 03328 * clip to 1 in both directions as well (since minimum channel index * 03329 * is 0). See route.c for a channel diagram. */ 03330 03331 bb_coord_new->xmin = max(min(xmin, nx), 1); 03332 bb_coord_new->ymin = max(min(ymin, ny), 1); 03333 bb_coord_new->xmax = max(min(xmax, nx), 1); 03334 bb_coord_new->ymax = max(min(ymax, ny), 1); 03335 }
static double get_std_dev | ( | int | n, | |
double | sum_x_squared, | |||
double | av_x | |||
) | [static] |
Definition at line 1031 of file place.c.
01034 { 01035 01036 /* Returns the standard deviation of data set x. There are n sample points, * 01037 * sum_x_squared is the summation over n of x^2 and av_x is the average x. * 01038 * All operations are done in double precision, since round off error can be * 01039 * a problem in the initial temp. std_dev calculation for big circuits. */ 01040 01041 double std_dev; 01042 01043 if(n <= 1) 01044 std_dev = 0.; 01045 else 01046 std_dev = (sum_x_squared - n * av_x * av_x) / (double)(n - 1); 01047 01048 if(std_dev > 0.) /* Very small variances sometimes round negative */ 01049 std_dev = sqrt(std_dev); 01050 else 01051 std_dev = 0.; 01052 01053 return (std_dev); 01054 }
static void initial_placement | ( | enum e_pad_loc_type | pad_loc_type, | |
char * | pad_loc_file | |||
) | [static] |
Definition at line 3582 of file place.c.
03584 { 03585 03586 /* Randomly places the blocks to create an initial placement. */ 03587 struct s_pos 03588 { 03589 int x; 03590 int y; 03591 int z; 03592 } 03593 **pos; /* [0..num_types-1][0..type_tsize - 1] */ 03594 int i, j, k, iblk, choice, type_index, x, y, z; 03595 int *count, *index; /* [0..num_types-1] */ 03596 03597 pos = (struct s_pos **)my_malloc(num_types * sizeof(struct s_pos *)); 03598 count = (int *)my_calloc(num_types, sizeof(int)); 03599 index = (int *)my_calloc(num_types, sizeof(int)); 03600 03601 /* Initialize all occupancy to zero. */ 03602 03603 for(i = 0; i <= nx + 1; i++) 03604 { 03605 for(j = 0; j <= ny + 1; j++) 03606 { 03607 grid[i][j].usage = 0; 03608 for(k = 0; k < grid[i][j].type->capacity; k++) 03609 { 03610 grid[i][j].blocks[k] = EMPTY; 03611 if(grid[i][j].offset == 0) 03612 { 03613 count[grid[i][j].type->index]++; 03614 } 03615 } 03616 } 03617 } 03618 03619 for(i = 0; i < num_types; i++) 03620 { 03621 pos[i] = 03622 (struct s_pos *)my_malloc(count[i] * sizeof(struct s_pos)); 03623 } 03624 03625 for(i = 0; i <= nx + 1; i++) 03626 { 03627 for(j = 0; j <= ny + 1; j++) 03628 { 03629 for(k = 0; k < grid[i][j].type->capacity; k++) 03630 { 03631 if(grid[i][j].offset == 0) 03632 { 03633 type_index = grid[i][j].type->index; 03634 pos[type_index][index[type_index]].x = i; 03635 pos[type_index][index[type_index]].y = j; 03636 pos[type_index][index[type_index]].z = k; 03637 index[type_index]++; 03638 } 03639 } 03640 } 03641 } 03642 03643 for(iblk = 0; iblk < num_blocks; iblk++) 03644 { 03645 /* Don't do IOs if the user specifies IOs */ 03646 if(!(block[iblk].type == IO_TYPE && pad_loc_type == USER)) 03647 { 03648 type_index = block[iblk].type->index; 03649 assert(count[type_index] > 0); 03650 choice = my_irand(count[type_index] - 1); 03651 x = pos[type_index][choice].x; 03652 y = pos[type_index][choice].y; 03653 z = pos[type_index][choice].z; 03654 grid[x][y].blocks[z] = iblk; 03655 grid[x][y].usage++; 03656 03657 /* Ensure randomizer doesn't pick this block again */ 03658 pos[type_index][choice] = pos[type_index][count[type_index] - 1]; /* overwrite used block position */ 03659 count[type_index]--; 03660 } 03661 } 03662 03663 if(pad_loc_type == USER) 03664 { 03665 read_user_pad_loc(pad_loc_file); 03666 } 03667 03668 /* All the blocks are placed now. Make the block array agree with the * 03669 * clb array. */ 03670 03671 for(i = 0; i <= (nx + 1); i++) 03672 { 03673 for(j = 0; j <= (ny + 1); j++) 03674 { 03675 for(k = 0; k < grid[i][j].type->capacity; k++) 03676 { 03677 assert(grid[i][j].blocks != NULL); 03678 iblk = grid[i][j].blocks[k]; 03679 03680 if(iblk != EMPTY) 03681 { 03682 block[iblk].x = i; 03683 block[iblk].y = j; 03684 block[iblk].z = k; 03685 } 03686 } 03687 } 03688 } 03689 03690 #ifdef VERBOSE 03691 printf("At end of initial_placement.\n"); 03692 dump_clbs(); 03693 #endif 03694 for(i = 0; i < num_types; i++) 03695 { 03696 free(pos[i]); 03697 } 03698 free(pos); /* Free the mapping list */ 03699 free(index); 03700 free(count); 03701 }
static void load_place_regions | ( | int | num_regions | ) | [static] |
Definition at line 2858 of file place.c.
02859 { 02860 02861 /* Loads the capacity values in each direction for each of the placement * 02862 * regions. The chip is divided into a num_regions x num_regions array. */ 02863 02864 int i, j, low_block, high_block, rnum; 02865 float low_lim, high_lim, capacity, fac, block_capacity; 02866 float len_fac, height_fac; 02867 02868 /* First load up horizontal channel capacities. */ 02869 02870 for(j = 0; j < num_regions; j++) 02871 { 02872 capacity = 0.; 02873 low_lim = (float)j / (float)num_regions *ny + 1.; 02874 high_lim = (float)(j + 1) / (float)num_regions *ny; 02875 02876 low_block = floor(low_lim); 02877 low_block = max(1, low_block); /* Watch for weird roundoff effects. */ 02878 high_block = ceil(high_lim); 02879 high_block = min(high_block, ny); 02880 02881 block_capacity = (chan_width_x[low_block - 1] + 02882 chan_width_x[low_block]) / 2.; 02883 if(low_block == 1) 02884 block_capacity += chan_width_x[0] / 2.; 02885 02886 fac = 1. - (low_lim - low_block); 02887 capacity += fac * block_capacity; 02888 02889 for(rnum = low_block + 1; rnum < high_block; rnum++) 02890 { 02891 block_capacity = 02892 (chan_width_x[rnum - 1] + chan_width_x[rnum]) / 2.; 02893 capacity += block_capacity; 02894 } 02895 02896 block_capacity = (chan_width_x[high_block - 1] + 02897 chan_width_x[high_block]) / 2.; 02898 if(high_block == ny) 02899 block_capacity += chan_width_x[ny] / 2.; 02900 02901 fac = 1. - (high_block - high_lim); 02902 capacity += fac * block_capacity; 02903 02904 for(i = 0; i < num_regions; i++) 02905 { 02906 place_region_x[i][j].capacity = capacity; 02907 place_region_x[i][j].inv_capacity = 1. / capacity; 02908 place_region_x[i][j].occupancy = 0.; 02909 place_region_x[i][j].cost = 0.; 02910 } 02911 } 02912 02913 /* Now load vertical channel capacities. */ 02914 02915 for(i = 0; i < num_regions; i++) 02916 { 02917 capacity = 0.; 02918 low_lim = (float)i / (float)num_regions *nx + 1.; 02919 high_lim = (float)(i + 1) / (float)num_regions *nx; 02920 02921 low_block = floor(low_lim); 02922 low_block = max(1, low_block); /* Watch for weird roundoff effects. */ 02923 high_block = ceil(high_lim); 02924 high_block = min(high_block, nx); 02925 02926 block_capacity = (chan_width_y[low_block - 1] + 02927 chan_width_y[low_block]) / 2.; 02928 if(low_block == 1) 02929 block_capacity += chan_width_y[0] / 2.; 02930 02931 fac = 1. - (low_lim - low_block); 02932 capacity += fac * block_capacity; 02933 02934 for(rnum = low_block + 1; rnum < high_block; rnum++) 02935 { 02936 block_capacity = 02937 (chan_width_y[rnum - 1] + chan_width_y[rnum]) / 2.; 02938 capacity += block_capacity; 02939 } 02940 02941 block_capacity = (chan_width_y[high_block - 1] + 02942 chan_width_y[high_block]) / 2.; 02943 if(high_block == nx) 02944 block_capacity += chan_width_y[nx] / 2.; 02945 02946 fac = 1. - (high_block - high_lim); 02947 capacity += fac * block_capacity; 02948 02949 for(j = 0; j < num_regions; j++) 02950 { 02951 place_region_y[i][j].capacity = capacity; 02952 place_region_y[i][j].inv_capacity = 1. / capacity; 02953 place_region_y[i][j].occupancy = 0.; 02954 place_region_y[i][j].cost = 0.; 02955 } 02956 } 02957 02958 /* Finally set up the arrays indicating the limits of each of the * 02959 * placement subregions. */ 02960 02961 len_fac = (float)nx / (float)num_regions; 02962 height_fac = (float)ny / (float)num_regions; 02963 02964 place_region_bounds_x[0] = 0.5; 02965 place_region_bounds_y[0] = 0.5; 02966 02967 for(i = 1; i <= num_regions; i++) 02968 { 02969 place_region_bounds_x[i] = place_region_bounds_x[i - 1] + len_fac; 02970 place_region_bounds_y[i] = 02971 place_region_bounds_y[i - 1] + height_fac; 02972 } 02973 }
static float nonlinear_cong_cost | ( | int | num_regions | ) | [static] |
Definition at line 2449 of file place.c.
02450 { 02451 02452 /* This routine computes the cost of a placement when the NONLINEAR_CONG * 02453 * option is selected. It assumes that the occupancies of all the * 02454 * placement subregions have been properly updated, and simply * 02455 * computes the cost due to these occupancies by summing over all * 02456 * subregions. This will be inefficient for moves that don't affect * 02457 * many subregions (i.e. small moves late in placement), esp. when there * 02458 * are a lot of subregions. May recode later to update only affected * 02459 * subregions. */ 02460 02461 float cost, tmp; 02462 int i, j; 02463 02464 cost = 0.; 02465 02466 for(i = 0; i < num_regions; i++) 02467 { 02468 for(j = 0; j < num_regions; j++) 02469 { 02470 02471 /* Many different cost metrics possible. 1st try: */ 02472 02473 if(place_region_x[i][j].occupancy < 02474 place_region_x[i][j].capacity) 02475 { 02476 cost += place_region_x[i][j].occupancy * 02477 place_region_x[i][j].inv_capacity; 02478 } 02479 else 02480 { /* Overused region -- penalize. */ 02481 02482 tmp = place_region_x[i][j].occupancy * 02483 place_region_x[i][j].inv_capacity; 02484 cost += tmp * tmp; 02485 } 02486 02487 if(place_region_y[i][j].occupancy < 02488 place_region_y[i][j].capacity) 02489 { 02490 cost += place_region_y[i][j].occupancy * 02491 place_region_y[i][j].inv_capacity; 02492 } 02493 else 02494 { /* Overused region -- penalize. */ 02495 02496 tmp = place_region_y[i][j].occupancy * 02497 place_region_y[i][j].inv_capacity; 02498 cost += tmp * tmp; 02499 } 02500 02501 } 02502 } 02503 02504 return (cost); 02505 }
static float recompute_bb_cost | ( | int | place_cost_type, | |
int | num_regions | |||
) | [static] |
Definition at line 1908 of file place.c.
01910 { 01911 01912 /* Recomputes the cost to eliminate roundoff that may have accrued. * 01913 * This routine does as little work as possible to compute this new * 01914 * cost. */ 01915 01916 int i, j, inet; 01917 float cost; 01918 01919 cost = 0; 01920 01921 /* Initialize occupancies to zero if regions are being used. */ 01922 01923 if(place_cost_type == NONLINEAR_CONG) 01924 { 01925 for(i = 0; i < num_regions; i++) 01926 { 01927 for(j = 0; j < num_regions; j++) 01928 { 01929 place_region_x[i][j].occupancy = 0.; 01930 place_region_y[i][j].occupancy = 0.; 01931 } 01932 } 01933 } 01934 01935 for(inet = 0; inet < num_nets; inet++) 01936 { /* for each net ... */ 01937 01938 if(net[inet].is_global == FALSE) 01939 { /* Do only if not global. */ 01940 01941 /* Bounding boxes don't have to be recomputed; they're correct. */ 01942 01943 if(place_cost_type != NONLINEAR_CONG) 01944 { 01945 cost += net_cost[inet]; 01946 } 01947 else 01948 { /* Must be nonlinear_cong case. */ 01949 update_region_occ(inet, &bb_coords[inet], 1, 01950 num_regions); 01951 } 01952 } 01953 } 01954 01955 if(place_cost_type == NONLINEAR_CONG) 01956 { 01957 cost = nonlinear_cong_cost(num_regions); 01958 } 01959 01960 return (cost); 01961 }
static void restore_region_occ | ( | float ** | old_region_occ_x, | |
float ** | old_region_occ_y, | |||
int | num_regions | |||
) | [static] |
Definition at line 1564 of file place.c.
01567 { 01568 01569 /* Restores the old occupancies of the placement subregions when the * 01570 * current move is not accepted. Used only for NONLINEAR_CONG. */ 01571 01572 int i, j; 01573 01574 for(i = 0; i < num_regions; i++) 01575 { 01576 for(j = 0; j < num_regions; j++) 01577 { 01578 place_region_x[i][j].occupancy = old_region_occ_x[i][j]; 01579 place_region_y[i][j].occupancy = old_region_occ_y[i][j]; 01580 } 01581 } 01582 }
static void save_region_occ | ( | float ** | old_region_occ_x, | |
float ** | old_region_occ_y, | |||
int | num_regions | |||
) | [static] |
Definition at line 1542 of file place.c.
01545 { 01546 01547 /* Saves the old occupancies of the placement subregions in case the * 01548 * current move is not accepted. Used only for NONLINEAR_CONG. */ 01549 01550 int i, j; 01551 01552 for(i = 0; i < num_regions; i++) 01553 { 01554 for(j = 0; j < num_regions; j++) 01555 { 01556 old_region_occ_x[i][j] = place_region_x[i][j].occupancy; 01557 old_region_occ_y[i][j] = place_region_y[i][j].occupancy; 01558 } 01559 } 01560 }
static float starting_t | ( | float * | cost_ptr, | |
float * | bb_cost_ptr, | |||
float * | timing_cost_ptr, | |||
int | place_cost_type, | |||
float ** | old_region_occ_x, | |||
float ** | old_region_occ_y, | |||
int | num_regions, | |||
boolean | fixed_pins, | |||
struct s_annealing_sched | annealing_sched, | |||
int | max_moves, | |||
float | rlim, | |||
enum e_place_algorithm | place_algorithm, | |||
float | timing_tradeoff, | |||
float | inverse_prev_bb_cost, | |||
float | inverse_prev_timing_cost, | |||
float * | delay_cost_ptr | |||
) | [static] |
Definition at line 1164 of file place.c.
01180 { 01181 01182 /* Finds the starting temperature (hot condition). */ 01183 01184 int i, num_accepted, move_lim; 01185 double std_dev, av, sum_of_squares; /* Double important to avoid round off */ 01186 int *x_lookup; 01187 01188 x_lookup = (int *)my_malloc(nx * sizeof(int)); 01189 01190 if(annealing_sched.type == USER_SCHED) 01191 return (annealing_sched.init_t); 01192 01193 move_lim = min(max_moves, num_blocks); 01194 01195 num_accepted = 0; 01196 av = 0.; 01197 sum_of_squares = 0.; 01198 01199 /* Try one move per block. Set t high so essentially all accepted. */ 01200 01201 for(i = 0; i < move_lim; i++) 01202 { 01203 if(try_swap(1.e30, cost_ptr, bb_cost_ptr, timing_cost_ptr, rlim, 01204 place_cost_type, 01205 old_region_occ_x, old_region_occ_y, num_regions, 01206 fixed_pins, place_algorithm, timing_tradeoff, 01207 inverse_prev_bb_cost, inverse_prev_timing_cost, 01208 delay_cost_ptr, x_lookup) == 1) 01209 { 01210 num_accepted++; 01211 av += *cost_ptr; 01212 sum_of_squares += *cost_ptr * (*cost_ptr); 01213 } 01214 } 01215 01216 if(num_accepted != 0) 01217 av /= num_accepted; 01218 else 01219 av = 0.; 01220 01221 std_dev = get_std_dev(num_accepted, sum_of_squares, av); 01222 01223 #ifdef DEBUG 01224 if(num_accepted != move_lim) 01225 { 01226 printf 01227 ("Warning: Starting t: %d of %d configurations accepted.\n", 01228 num_accepted, move_lim); 01229 } 01230 #endif 01231 01232 #ifdef VERBOSE 01233 printf("std_dev: %g, average cost: %g, starting temp: %g\n", 01234 std_dev, av, 20. * std_dev); 01235 #endif 01236 01237 free(x_lookup); 01238 01239 /* Set the initial temperature to 20 times the standard of deviation */ 01240 /* so that the initial temperature adjusts according to the circuit */ 01241 return (20. * std_dev); 01242 }
void try_place | ( | struct s_placer_opts | placer_opts, | |
struct s_annealing_sched | annealing_sched, | |||
t_chan_width_dist | chan_width_dist, | |||
struct s_router_opts | router_opts, | |||
struct s_det_routing_arch | det_routing_arch, | |||
t_segment_inf * | segment_inf, | |||
t_timing_inf | timing_inf, | |||
t_subblock_data * | subblock_data_ptr, | |||
t_mst_edge *** | mst | |||
) |
Definition at line 296 of file place.c.
00305 { 00306 00307 /* Does almost all the work of placing a circuit. Width_fac gives the * 00308 * width of the widest channel. Place_cost_exp says what exponent the * 00309 * width should be taken to when calculating costs. This allows a * 00310 * greater bias for anisotropic architectures. Place_cost_type * 00311 * determines which cost function is used. num_regions is used only * 00312 * the place_cost_type is NONLINEAR_CONG. */ 00313 00314 00315 int tot_iter, inner_iter, success_sum; 00316 int move_lim, moves_since_cost_recompute, width_fac; 00317 float t, success_rat, rlim, d_max, est_crit; 00318 float cost, timing_cost, bb_cost, new_bb_cost, new_timing_cost; 00319 float delay_cost, new_delay_cost, place_delay_value; 00320 float inverse_prev_bb_cost, inverse_prev_timing_cost; 00321 float oldt; 00322 double av_cost, av_bb_cost, av_timing_cost, av_delay_cost, 00323 sum_of_squares, std_dev; 00324 float **old_region_occ_x, **old_region_occ_y; 00325 char msg[BUFSIZE]; 00326 boolean fixed_pins; /* Can pads move or not? */ 00327 int num_connections; 00328 int inet, ipin, outer_crit_iter_count, inner_crit_iter_count, 00329 inner_recompute_limit; 00330 float **net_slack, **net_delay; 00331 float crit_exponent; 00332 float first_rlim, final_rlim, inverse_delta_rlim; 00333 float **remember_net_delay_original_ptr; /*used to free net_delay if it is re-assigned */ 00334 00335 int *x_lookup; /* Used to quickly determine valid swap columns */ 00336 00337 /* Allocated here because it goes into timing critical code where each memory allocation is expensive */ 00338 x_lookup = my_malloc(nx * sizeof(int)); 00339 00340 remember_net_delay_original_ptr = NULL; /*prevents compiler warning */ 00341 00342 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 00343 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || 00344 placer_opts.enable_timing_computations) 00345 { 00346 /*do this before the initial placement to avoid messing up the initial placement */ 00347 alloc_lookups_and_criticalities(chan_width_dist, 00348 router_opts, 00349 det_routing_arch, 00350 segment_inf, 00351 timing_inf, 00352 *subblock_data_ptr, 00353 &net_delay, &net_slack); 00354 00355 remember_net_delay_original_ptr = net_delay; 00356 00357 /*#define PRINT_LOWER_BOUND */ 00358 #ifdef PRINT_LOWER_BOUND 00359 /*print the crit_path, assuming delay between blocks that are* 00360 *block_dist apart*/ 00361 00362 if(placer_opts.block_dist <= nx) 00363 place_delay_value = 00364 delta_clb_to_clb[placer_opts.block_dist][0]; 00365 else if(placer_opts.block_dist <= ny) 00366 place_delay_value = 00367 delta_clb_to_clb[0][placer_opts.block_dist]; 00368 else 00369 place_delay_value = delta_clb_to_clb[nx][ny]; 00370 00371 printf("\nLower bound assuming delay of %g\n", place_delay_value); 00372 00373 load_constant_net_delay(net_delay, place_delay_value); 00374 load_timing_graph_net_delays(net_delay); 00375 d_max = load_net_slack(net_slack, 0); 00376 00377 #ifdef CREATE_ECHO_FILES 00378 print_critical_path("Placement_Lower_Bound.echo"); 00379 print_sink_delays("Placement_Lower_Bound_Sink_Delays.echo"); 00380 #endif /* CREATE_ECHO_FILES */ 00381 00382 /*also print sink delays assuming 0 delay between blocks, 00383 * this tells us how much logic delay is on each path */ 00384 00385 load_constant_net_delay(net_delay, 0); 00386 load_timing_graph_net_delays(net_delay); 00387 d_max = load_net_slack(net_slack, 0); 00388 00389 #ifdef CREATE_ECHO_FILES 00390 print_sink_delays("Placement_Logic_Sink_Delays.echo"); 00391 #endif /* CREATE_ECHO_FILES */ 00392 #endif 00393 00394 } 00395 00396 width_fac = placer_opts.place_chan_width; 00397 if(placer_opts.pad_loc_type == FREE) 00398 fixed_pins = FALSE; 00399 else 00400 fixed_pins = TRUE; 00401 00402 init_chan(width_fac, chan_width_dist); 00403 00404 alloc_and_load_placement_structs(placer_opts.place_cost_type, 00405 placer_opts.num_regions, 00406 placer_opts.place_cost_exp, 00407 &old_region_occ_x, &old_region_occ_y, 00408 placer_opts); 00409 00410 initial_placement(placer_opts.pad_loc_type, placer_opts.pad_loc_file); 00411 init_draw_coords((float)width_fac); 00412 00413 /* Storing the number of pins on each type of block makes the swap routine * 00414 * slightly more efficient. */ 00415 00416 /* Gets initial cost and loads bounding boxes. */ 00417 00418 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 00419 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) 00420 { 00421 bb_cost = comp_bb_cost(NORMAL, placer_opts.place_cost_type, 00422 placer_opts.num_regions); 00423 00424 crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */ 00425 00426 compute_net_pin_index_values(); 00427 00428 num_connections = count_connections(); 00429 printf 00430 ("\nThere are %d point to point connections in this circuit\n\n", 00431 num_connections); 00432 00433 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) 00434 { 00435 for(inet = 0; inet < num_nets; inet++) 00436 for(ipin = 1; ipin <= net[inet].num_sinks; ipin++) 00437 timing_place_crit[inet][ipin] = 0; /*dummy crit values */ 00438 00439 comp_td_costs(&timing_cost, &delay_cost); /*first pass gets delay_cost, which is used 00440 * in criticality computations in the next call 00441 * to comp_td_costs. */ 00442 place_delay_value = delay_cost / num_connections; /*used for computing criticalities */ 00443 load_constant_net_delay(net_delay, place_delay_value); 00444 00445 } 00446 else 00447 place_delay_value = 0; 00448 00449 00450 if(placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) 00451 { 00452 net_delay = point_to_point_delay_cost; /*this keeps net_delay up to date with * 00453 * *the same values that the placer is using * 00454 * *point_to_point_delay_cost is computed each* 00455 * *time that comp_td_costs is called, and is * 00456 * *also updated after any swap is accepted */ 00457 } 00458 00459 00460 load_timing_graph_net_delays(net_delay); 00461 d_max = load_net_slack(net_slack, 0); 00462 load_criticalities(placer_opts, net_slack, d_max, crit_exponent); 00463 outer_crit_iter_count = 1; 00464 00465 /*now we can properly compute costs */ 00466 comp_td_costs(&timing_cost, &delay_cost); /*also puts proper values into point_to_point_delay_cost */ 00467 00468 inverse_prev_timing_cost = 1 / timing_cost; 00469 inverse_prev_bb_cost = 1 / bb_cost; 00470 cost = 1; /*our new cost function uses normalized values of */ 00471 /*bb_cost and timing_cost, the value of cost will be reset */ 00472 /*to 1 at each temperature when *_TIMING_DRIVEN_PLACE is true */ 00473 } 00474 else 00475 { /*BOUNDING_BOX_PLACE */ 00476 cost = bb_cost = comp_bb_cost(NORMAL, placer_opts.place_cost_type, 00477 placer_opts.num_regions); 00478 timing_cost = 0; 00479 delay_cost = 0; 00480 place_delay_value = 0; 00481 outer_crit_iter_count = 0; 00482 num_connections = 0; 00483 d_max = 0; 00484 crit_exponent = 0; 00485 00486 inverse_prev_timing_cost = 0; /*inverses not used */ 00487 inverse_prev_bb_cost = 0; 00488 } 00489 00490 move_lim = (int)(annealing_sched.inner_num * pow(num_blocks, 1.3333)); 00491 00492 if(placer_opts.inner_loop_recompute_divider != 0) 00493 inner_recompute_limit = (int)(0.5 + (float)move_lim / 00494 (float)placer_opts. 00495 inner_loop_recompute_divider); 00496 else /*don't do an inner recompute */ 00497 inner_recompute_limit = move_lim + 1; 00498 00499 00500 /* Sometimes I want to run the router with a random placement. Avoid * 00501 * using 0 moves to stop division by 0 and 0 length vector problems, * 00502 * by setting move_lim to 1 (which is still too small to do any * 00503 * significant optimization). */ 00504 00505 if(move_lim <= 0) 00506 move_lim = 1; 00507 00508 rlim = (float)max(nx, ny); 00509 00510 first_rlim = rlim; /*used in timing-driven placement for exponent computation */ 00511 final_rlim = 1; 00512 inverse_delta_rlim = 1 / (first_rlim - final_rlim); 00513 00514 t = starting_t(&cost, &bb_cost, &timing_cost, 00515 placer_opts.place_cost_type, 00516 old_region_occ_x, old_region_occ_y, 00517 placer_opts.num_regions, fixed_pins, annealing_sched, 00518 move_lim, rlim, placer_opts.place_algorithm, 00519 placer_opts.timing_tradeoff, inverse_prev_bb_cost, 00520 inverse_prev_timing_cost, &delay_cost); 00521 tot_iter = 0; 00522 moves_since_cost_recompute = 0; 00523 printf 00524 ("Initial Placement Cost: %g bb_cost: %g td_cost: %g delay_cost: %g\n\n", 00525 cost, bb_cost, timing_cost, delay_cost); 00526 00527 #ifndef SPEC 00528 printf 00529 ("%11s %10s %11s %11s %11s %11s %11s %9s %8s %7s %7s %10s %7s\n", 00530 "T", "Cost", "Av. BB Cost", "Av. TD Cost", "Av Tot Del", 00531 "P to P Del", "d_max", "Ac Rate", "Std Dev", "R limit", "Exp", 00532 "Tot. Moves", "Alpha"); 00533 printf 00534 ("%11s %10s %11s %11s %11s %11s %11s %9s %8s %7s %7s %10s %7s\n", 00535 "--------", "----------", "-----------", "-----------", 00536 "---------", "----------", "-----", "-------", "-------", 00537 "-------", "-------", "----------", "-----"); 00538 #endif 00539 00540 sprintf(msg, 00541 "Initial Placement. Cost: %g BB Cost: %g TD Cost %g Delay Cost: %g " 00542 "\t d_max %g Channel Factor: %d", cost, bb_cost, timing_cost, 00543 delay_cost, d_max, width_fac); 00544 update_screen(MAJOR, msg, PLACEMENT, FALSE); 00545 00546 while(exit_crit(t, cost, annealing_sched) == 0) 00547 { 00548 00549 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 00550 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) 00551 { 00552 cost = 1; 00553 } 00554 00555 av_cost = 0.; 00556 av_bb_cost = 0.; 00557 av_delay_cost = 0.; 00558 av_timing_cost = 0.; 00559 sum_of_squares = 0.; 00560 success_sum = 0; 00561 00562 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 00563 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) 00564 { 00565 00566 if(outer_crit_iter_count >= 00567 placer_opts.recompute_crit_iter 00568 || placer_opts.inner_loop_recompute_divider != 0) 00569 { 00570 #ifdef VERBOSE 00571 printf("Outer Loop Recompute Criticalities\n"); 00572 #endif 00573 place_delay_value = delay_cost / num_connections; 00574 00575 if(placer_opts.place_algorithm == 00576 NET_TIMING_DRIVEN_PLACE) 00577 load_constant_net_delay(net_delay, 00578 place_delay_value); 00579 /*note, for path_based, the net delay is not updated since it is current, 00580 *because it accesses point_to_point_delay array */ 00581 00582 load_timing_graph_net_delays(net_delay); 00583 d_max = load_net_slack(net_slack, 0); 00584 load_criticalities(placer_opts, net_slack, d_max, 00585 crit_exponent); 00586 /*recompute costs from scratch, based on new criticalities */ 00587 comp_td_costs(&timing_cost, &delay_cost); 00588 outer_crit_iter_count = 0; 00589 } 00590 outer_crit_iter_count++; 00591 00592 /*at each temperature change we update these values to be used */ 00593 /*for normalizing the tradeoff between timing and wirelength (bb) */ 00594 inverse_prev_bb_cost = 1 / bb_cost; 00595 inverse_prev_timing_cost = 1 / timing_cost; 00596 } 00597 00598 inner_crit_iter_count = 1; 00599 00600 for(inner_iter = 0; inner_iter < move_lim; inner_iter++) 00601 { 00602 if(try_swap(t, &cost, &bb_cost, &timing_cost, 00603 rlim, placer_opts.place_cost_type, 00604 old_region_occ_x, old_region_occ_y, 00605 placer_opts.num_regions, fixed_pins, 00606 placer_opts.place_algorithm, 00607 placer_opts.timing_tradeoff, 00608 inverse_prev_bb_cost, 00609 inverse_prev_timing_cost, &delay_cost, 00610 x_lookup) == 1) 00611 { 00612 success_sum++; 00613 av_cost += cost; 00614 av_bb_cost += bb_cost; 00615 av_timing_cost += timing_cost; 00616 av_delay_cost += delay_cost; 00617 sum_of_squares += cost * cost; 00618 } 00619 00620 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE 00621 || placer_opts.place_algorithm == 00622 PATH_TIMING_DRIVEN_PLACE) 00623 { 00624 00625 if(inner_crit_iter_count >= inner_recompute_limit 00626 && inner_iter != move_lim - 1) 00627 { /*on last iteration don't recompute */ 00628 00629 inner_crit_iter_count = 0; 00630 #ifdef VERBOSE 00631 printf 00632 ("Inner Loop Recompute Criticalities\n"); 00633 #endif 00634 if(placer_opts.place_algorithm == 00635 NET_TIMING_DRIVEN_PLACE) 00636 { 00637 place_delay_value = 00638 delay_cost / num_connections; 00639 load_constant_net_delay(net_delay, 00640 place_delay_value); 00641 } 00642 00643 load_timing_graph_net_delays(net_delay); 00644 d_max = load_net_slack(net_slack, 0); 00645 load_criticalities(placer_opts, net_slack, 00646 d_max, crit_exponent); 00647 comp_td_costs(&timing_cost, &delay_cost); 00648 } 00649 inner_crit_iter_count++; 00650 } 00651 #ifdef VERBOSE 00652 printf 00653 ("t = %g cost = %g bb_cost = %g timing_cost = %g move = %d dmax = %g\n", 00654 t, cost, bb_cost, timing_cost, inner_iter, d_max); 00655 if(fabs 00656 (bb_cost - 00657 comp_bb_cost(CHECK, placer_opts.place_cost_type, 00658 placer_opts.num_regions)) > 00659 bb_cost * ERROR_TOL) 00660 exit(1); 00661 #endif 00662 } 00663 00664 /* Lines below prevent too much round-off error from accumulating * 00665 * in the cost over many iterations. This round-off can lead to * 00666 * error checks failing because the cost is different from what * 00667 * you get when you recompute from scratch. */ 00668 00669 moves_since_cost_recompute += move_lim; 00670 if(moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE) 00671 { 00672 new_bb_cost = 00673 recompute_bb_cost(placer_opts.place_cost_type, 00674 placer_opts.num_regions); 00675 if(fabs(new_bb_cost - bb_cost) > bb_cost * ERROR_TOL) 00676 { 00677 printf 00678 ("Error in try_place: new_bb_cost = %g, old bb_cost = %g.\n", 00679 new_bb_cost, bb_cost); 00680 exit(1); 00681 } 00682 bb_cost = new_bb_cost; 00683 00684 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE 00685 || placer_opts.place_algorithm == 00686 PATH_TIMING_DRIVEN_PLACE) 00687 { 00688 comp_td_costs(&new_timing_cost, &new_delay_cost); 00689 if(fabs(new_timing_cost - timing_cost) > 00690 timing_cost * ERROR_TOL) 00691 { 00692 printf 00693 ("Error in try_place: new_timing_cost = %g, old timing_cost = %g.\n", 00694 new_timing_cost, timing_cost); 00695 exit(1); 00696 } 00697 if(fabs(new_delay_cost - delay_cost) > 00698 delay_cost * ERROR_TOL) 00699 { 00700 printf 00701 ("Error in try_place: new_delay_cost = %g, old delay_cost = %g.\n", 00702 new_delay_cost, delay_cost); 00703 exit(1); 00704 } 00705 timing_cost = new_timing_cost; 00706 } 00707 00708 if(placer_opts.place_algorithm == BOUNDING_BOX_PLACE) 00709 { 00710 cost = new_bb_cost; 00711 } 00712 moves_since_cost_recompute = 0; 00713 } 00714 00715 tot_iter += move_lim; 00716 success_rat = ((float)success_sum) / move_lim; 00717 if(success_sum == 0) 00718 { 00719 av_cost = cost; 00720 av_bb_cost = bb_cost; 00721 av_timing_cost = timing_cost; 00722 av_delay_cost = delay_cost; 00723 } 00724 else 00725 { 00726 av_cost /= success_sum; 00727 av_bb_cost /= success_sum; 00728 av_timing_cost /= success_sum; 00729 av_delay_cost /= success_sum; 00730 } 00731 std_dev = get_std_dev(success_sum, sum_of_squares, av_cost); 00732 00733 #ifndef SPEC 00734 printf 00735 ("%11.5g %10.6g %11.6g %11.6g %11.6g %11.6g %11.4g %9.4g %8.3g %7.4g %7.4g %10d ", 00736 t, av_cost, av_bb_cost, av_timing_cost, av_delay_cost, 00737 place_delay_value, d_max, success_rat, std_dev, rlim, 00738 crit_exponent, tot_iter); 00739 #endif 00740 00741 oldt = t; /* for finding and printing alpha. */ 00742 update_t(&t, std_dev, rlim, success_rat, annealing_sched); 00743 00744 #ifndef SPEC 00745 printf("%7.4g\n", t / oldt); 00746 #endif 00747 00748 sprintf(msg, 00749 "Cost: %g BB Cost %g TD Cost %g Temperature: %g d_max: %g", 00750 cost, bb_cost, timing_cost, t, d_max); 00751 update_screen(MINOR, msg, PLACEMENT, FALSE); 00752 update_rlim(&rlim, success_rat); 00753 00754 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 00755 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) 00756 { 00757 crit_exponent = 00758 (1 - 00759 (rlim - 00760 final_rlim) * inverse_delta_rlim) * 00761 (placer_opts.td_place_exp_last - 00762 placer_opts.td_place_exp_first) + 00763 placer_opts.td_place_exp_first; 00764 } 00765 #ifdef VERBOSE 00766 dump_clbs(); 00767 #endif 00768 } 00769 00770 t = 0; /* freeze out */ 00771 av_cost = 0.; 00772 av_bb_cost = 0.; 00773 av_timing_cost = 0.; 00774 sum_of_squares = 0.; 00775 av_delay_cost = 0.; 00776 success_sum = 0; 00777 00778 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 00779 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) 00780 { 00781 /*at each temperature change we update these values to be used */ 00782 /*for normalizing the tradeoff between timing and wirelength (bb) */ 00783 if(outer_crit_iter_count >= placer_opts.recompute_crit_iter || 00784 placer_opts.inner_loop_recompute_divider != 0) 00785 { 00786 00787 #ifdef VERBOSE 00788 printf("Outer Loop Recompute Criticalities\n"); 00789 #endif 00790 place_delay_value = delay_cost / num_connections; 00791 00792 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) 00793 load_constant_net_delay(net_delay, place_delay_value); 00794 00795 load_timing_graph_net_delays(net_delay); 00796 d_max = load_net_slack(net_slack, 0); 00797 load_criticalities(placer_opts, net_slack, d_max, 00798 crit_exponent); 00799 /*recompute criticaliies */ 00800 comp_td_costs(&timing_cost, &delay_cost); 00801 outer_crit_iter_count = 0; 00802 } 00803 outer_crit_iter_count++; 00804 00805 inverse_prev_bb_cost = 1 / (bb_cost); 00806 inverse_prev_timing_cost = 1 / (timing_cost); 00807 } 00808 00809 inner_crit_iter_count = 1; 00810 00811 for(inner_iter = 0; inner_iter < move_lim; inner_iter++) 00812 { 00813 if(try_swap(t, &cost, &bb_cost, &timing_cost, 00814 rlim, placer_opts.place_cost_type, 00815 old_region_occ_x, old_region_occ_y, 00816 placer_opts.num_regions, fixed_pins, 00817 placer_opts.place_algorithm, 00818 placer_opts.timing_tradeoff, inverse_prev_bb_cost, 00819 inverse_prev_timing_cost, &delay_cost, x_lookup) == 1) 00820 { 00821 success_sum++; 00822 av_cost += cost; 00823 av_bb_cost += bb_cost; 00824 av_delay_cost += delay_cost; 00825 av_timing_cost += timing_cost; 00826 sum_of_squares += cost * cost; 00827 00828 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE 00829 || placer_opts.place_algorithm == 00830 PATH_TIMING_DRIVEN_PLACE) 00831 { 00832 00833 if(inner_crit_iter_count >= inner_recompute_limit 00834 && inner_iter != move_lim - 1) 00835 { 00836 00837 inner_crit_iter_count = 0; 00838 #ifdef VERBOSE 00839 printf 00840 ("Inner Loop Recompute Criticalities\n"); 00841 #endif 00842 if(placer_opts.place_algorithm == 00843 NET_TIMING_DRIVEN_PLACE) 00844 { 00845 place_delay_value = 00846 delay_cost / num_connections; 00847 load_constant_net_delay(net_delay, 00848 place_delay_value); 00849 } 00850 00851 load_timing_graph_net_delays(net_delay); 00852 d_max = load_net_slack(net_slack, 0); 00853 load_criticalities(placer_opts, net_slack, 00854 d_max, crit_exponent); 00855 comp_td_costs(&timing_cost, &delay_cost); 00856 } 00857 inner_crit_iter_count++; 00858 } 00859 } 00860 #ifdef VERBOSE 00861 printf("t = %g cost = %g move = %d\n", t, cost, tot_iter); 00862 #endif 00863 } 00864 tot_iter += move_lim; 00865 success_rat = ((float)success_sum) / move_lim; 00866 if(success_sum == 0) 00867 { 00868 av_cost = cost; 00869 av_bb_cost = bb_cost; 00870 av_delay_cost = delay_cost; 00871 av_timing_cost = timing_cost; 00872 } 00873 else 00874 { 00875 av_cost /= success_sum; 00876 av_bb_cost /= success_sum; 00877 av_delay_cost /= success_sum; 00878 av_timing_cost /= success_sum; 00879 } 00880 00881 std_dev = get_std_dev(success_sum, sum_of_squares, av_cost); 00882 00883 00884 #ifndef SPEC 00885 printf 00886 ("%11.5g %10.6g %11.6g %11.6g %11.6g %11.6g %11.4g %9.4g %8.3g %7.4g %7.4g %10d \n\n", 00887 t, av_cost, av_bb_cost, av_timing_cost, av_delay_cost, 00888 place_delay_value, d_max, success_rat, std_dev, rlim, 00889 crit_exponent, tot_iter); 00890 00891 #endif 00892 00893 #ifdef VERBOSE 00894 dump_clbs(); 00895 #endif 00896 00897 check_place(bb_cost, timing_cost, placer_opts.place_cost_type, 00898 placer_opts.num_regions, placer_opts.place_algorithm, 00899 delay_cost); 00900 00901 if(placer_opts.enable_timing_computations && 00902 placer_opts.place_algorithm == BOUNDING_BOX_PLACE) 00903 { 00904 /*need this done since the timing data has not been kept up to date* 00905 *in bounding_box mode */ 00906 for(inet = 0; inet < num_nets; inet++) 00907 for(ipin = 1; ipin <= net[inet].num_sinks; ipin++) 00908 timing_place_crit[inet][ipin] = 0; /*dummy crit values */ 00909 comp_td_costs(&timing_cost, &delay_cost); /*computes point_to_point_delay_cost */ 00910 } 00911 00912 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 00913 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || 00914 placer_opts.enable_timing_computations) 00915 { 00916 net_delay = point_to_point_delay_cost; /*this makes net_delay up to date with * 00917 *the same values that the placer is using*/ 00918 load_timing_graph_net_delays(net_delay); 00919 est_crit = load_net_slack(net_slack, 0); 00920 #ifdef CREATE_ECHO_FILES 00921 /* print_sink_delays("placement_sink_delays.echo"); */ 00922 print_net_slack("placement_net_slacks.echo", net_slack); 00923 print_critical_path("placement_crit_path.echo", 00924 *subblock_data_ptr); 00925 #endif /* CREATE_ECHO_FILES */ 00926 printf("Placement Estimated Crit Path Delay: %g\n\n", est_crit); 00927 } 00928 00929 00930 sprintf(msg, 00931 "Placement. Cost: %g bb_cost: %g td_cost: %g Channel Factor: %d d_max: %g", 00932 cost, bb_cost, timing_cost, width_fac, d_max); 00933 printf 00934 ("Placement. Cost: %g bb_cost: %g td_cost: %g delay_cost: %g.\n", 00935 cost, bb_cost, timing_cost, delay_cost); 00936 update_screen(MAJOR, msg, PLACEMENT, FALSE); 00937 00938 #ifdef SPEC 00939 printf("Total moves attempted: %d.0\n", tot_iter); 00940 #endif 00941 00942 if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || 00943 placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || 00944 placer_opts.enable_timing_computations) 00945 { 00946 00947 net_delay = remember_net_delay_original_ptr; 00948 00949 free_placement_structs(placer_opts.place_cost_type, 00950 placer_opts.num_regions, old_region_occ_x, 00951 old_region_occ_y, placer_opts); 00952 free_lookups_and_criticalities(&net_delay, &net_slack); 00953 } 00954 00955 /* placement is done - find mst of all nets. 00956 * creating mst for each net; this gives me an ordering of sinks 00957 * by which I will direct search (A*) for. */ 00958 if(*mst) 00959 { 00960 for(inet = 0; inet < num_nets; inet++) 00961 { 00962 assert((*mst)[inet]); 00963 free((*mst)[inet]); 00964 } 00965 free(*mst); 00966 } 00967 *mst = (t_mst_edge **) my_malloc(sizeof(t_mst_edge *) * num_nets); 00968 for(inet = 0; inet < num_nets; inet++) 00969 { 00970 (*mst)[inet] = get_mst_of_net(inet); 00971 } 00972 free(x_lookup); 00973 }
static int try_swap | ( | float | t, | |
float * | cost, | |||
float * | bb_cost, | |||
float * | timing_cost, | |||
float | rlim, | |||
int | place_cost_type, | |||
float ** | old_region_occ_x, | |||
float ** | old_region_occ_y, | |||
int | num_regions, | |||
boolean | fixed_pins, | |||
enum e_place_algorithm | place_algorithm, | |||
float | timing_tradeoff, | |||
float | inverse_prev_bb_cost, | |||
float | inverse_prev_timing_cost, | |||
float * | delay_cost, | |||
int * | x_lookup | |||
) | [static] |
Definition at line 1246 of file place.c.
01262 { 01263 01264 /* Picks some block and moves it to another spot. If this spot is * 01265 * occupied, switch the blocks. Assess the change in cost function * 01266 * and accept or reject the move. If rejected, return 0. If * 01267 * accepted return 1. Pass back the new value of the cost function. * 01268 * rlim is the range limiter. */ 01269 01270 int b_from, x_to, y_to, z_to, x_from, y_from, z_from, b_to; 01271 int i, k, inet, keep_switch, num_of_pins, max_pins_per_fb; 01272 int num_nets_affected, bb_index; 01273 float delta_c, bb_delta_c, timing_delta_c, delay_delta_c, newcost; 01274 static struct s_bb *bb_coord_new = NULL; 01275 static struct s_bb *bb_edge_new = NULL; 01276 static int *nets_to_update = NULL, *net_block_moved = NULL; 01277 01278 max_pins_per_fb = 0; 01279 for(i = 0; i < num_types; i++) 01280 { 01281 max_pins_per_fb = 01282 max(max_pins_per_fb, type_descriptors[i].num_pins); 01283 } 01284 01285 /* Allocate the local bb_coordinate storage, etc. only once. */ 01286 01287 if(bb_coord_new == NULL) 01288 { 01289 bb_coord_new = (struct s_bb *)my_malloc(2 * max_pins_per_fb * 01290 sizeof(struct s_bb)); 01291 bb_edge_new = (struct s_bb *)my_malloc(2 * max_pins_per_fb * 01292 sizeof(struct s_bb)); 01293 nets_to_update = 01294 (int *)my_malloc(2 * max_pins_per_fb * sizeof(int)); 01295 net_block_moved = 01296 (int *)my_malloc(2 * max_pins_per_fb * sizeof(int)); 01297 } 01298 01299 01300 delay_delta_c = 0.0; 01301 b_from = my_irand(num_blocks - 1); 01302 01303 /* If the pins are fixed we never move them from their initial * 01304 * random locations. The code below could be made more efficient * 01305 * by using the fact that pins appear first in the block list, * 01306 * but this shouldn't cause any significant slowdown and won't be * 01307 * broken if I ever change the parser so that the pins aren't * 01308 * necessarily at the start of the block list. */ 01309 01310 if(fixed_pins == TRUE) 01311 { 01312 while(block[b_from].type == IO_TYPE) 01313 { 01314 b_from = my_irand(num_blocks - 1); 01315 } 01316 } 01317 01318 x_from = block[b_from].x; 01319 y_from = block[b_from].y; 01320 z_from = block[b_from].z; 01321 01322 if(!find_to 01323 (x_from, y_from, block[b_from].type, rlim, x_lookup, &x_to, &y_to)) 01324 return FALSE; 01325 01326 /* Make the switch in order to make computing the new bounding * 01327 * box simpler. If the cost increase is too high, switch them * 01328 * back. (block data structures switched, clbs not switched * 01329 * until success of move is determined.) */ 01330 01331 z_to = 0; 01332 if(grid[x_to][y_to].type->capacity > 1) 01333 { 01334 z_to = my_irand(grid[x_to][y_to].type->capacity - 1); 01335 } 01336 if(grid[x_to][y_to].blocks[z_to] == EMPTY) 01337 { /* Moving to an empty location */ 01338 b_to = EMPTY; 01339 block[b_from].x = x_to; 01340 block[b_from].y = y_to; 01341 block[b_from].z = z_to; 01342 } 01343 else 01344 { /* Swapping two blocks */ 01345 b_to = grid[x_to][y_to].blocks[z_to]; 01346 block[b_to].x = x_from; 01347 block[b_to].y = y_from; 01348 block[b_to].z = z_from; 01349 01350 block[b_from].x = x_to; 01351 block[b_from].y = y_to; 01352 block[b_from].z = z_to; 01353 } 01354 01355 /* Now update the cost function. May have to do major optimizations * 01356 * here later. */ 01357 01358 /* I'm using negative values of temp_net_cost as a flag, so DO NOT * 01359 * use cost functions that can go negative. */ 01360 01361 delta_c = 0; /* Change in cost due to this swap. */ 01362 bb_delta_c = 0; 01363 timing_delta_c = 0; 01364 01365 num_of_pins = block[b_from].type->num_pins; 01366 01367 num_nets_affected = find_affected_nets(nets_to_update, net_block_moved, 01368 b_from, b_to, num_of_pins); 01369 01370 if(place_cost_type == NONLINEAR_CONG) 01371 { 01372 save_region_occ(old_region_occ_x, old_region_occ_y, num_regions); 01373 } 01374 01375 bb_index = 0; /* Index of new bounding box. */ 01376 01377 for(k = 0; k < num_nets_affected; k++) 01378 { 01379 inet = nets_to_update[k]; 01380 01381 /* If we swapped two blocks connected to the same net, its bounding box * 01382 * doesn't change. */ 01383 01384 if(net_block_moved[k] == FROM_AND_TO) 01385 continue; 01386 01387 if(net[inet].num_sinks < SMALL_NET) 01388 { 01389 get_non_updateable_bb(inet, &bb_coord_new[bb_index]); 01390 } 01391 else 01392 { 01393 if(net_block_moved[k] == FROM) 01394 update_bb(inet, &bb_coord_new[bb_index], 01395 &bb_edge_new[bb_index], x_from, y_from, 01396 x_to, y_to); 01397 else 01398 update_bb(inet, &bb_coord_new[bb_index], 01399 &bb_edge_new[bb_index], x_to, y_to, x_from, 01400 y_from); 01401 } 01402 01403 if(place_cost_type != NONLINEAR_CONG) 01404 { 01405 temp_net_cost[inet] = 01406 get_net_cost(inet, &bb_coord_new[bb_index]); 01407 bb_delta_c += temp_net_cost[inet] - net_cost[inet]; 01408 } 01409 else 01410 { 01411 /* Rip up, then replace with new bb. */ 01412 update_region_occ(inet, &bb_coords[inet], -1, 01413 num_regions); 01414 update_region_occ(inet, &bb_coord_new[bb_index], 1, 01415 num_regions); 01416 } 01417 01418 bb_index++; 01419 } 01420 01421 if(place_cost_type == NONLINEAR_CONG) 01422 { 01423 newcost = nonlinear_cong_cost(num_regions); 01424 bb_delta_c = newcost - *bb_cost; 01425 } 01426 01427 01428 if(place_algorithm == NET_TIMING_DRIVEN_PLACE || 01429 place_algorithm == PATH_TIMING_DRIVEN_PLACE) 01430 { 01431 /*in this case we redefine delta_c as a combination of timing and bb. * 01432 *additionally, we normalize all values, therefore delta_c is in * 01433 *relation to 1*/ 01434 01435 comp_delta_td_cost(b_from, b_to, num_of_pins, &timing_delta_c, 01436 &delay_delta_c); 01437 01438 delta_c = 01439 (1 - timing_tradeoff) * bb_delta_c * inverse_prev_bb_cost + 01440 timing_tradeoff * timing_delta_c * inverse_prev_timing_cost; 01441 } 01442 else 01443 { 01444 delta_c = bb_delta_c; 01445 } 01446 01447 01448 keep_switch = assess_swap(delta_c, t); 01449 01450 /* 1 -> move accepted, 0 -> rejected. */ 01451 01452 if(keep_switch) 01453 { 01454 *cost = *cost + delta_c; 01455 *bb_cost = *bb_cost + bb_delta_c; 01456 01457 01458 if(place_algorithm == NET_TIMING_DRIVEN_PLACE || 01459 place_algorithm == PATH_TIMING_DRIVEN_PLACE) 01460 { 01461 /*update the point_to_point_timing_cost and point_to_point_delay_cost 01462 * values from the temporary values */ 01463 *timing_cost = *timing_cost + timing_delta_c; 01464 *delay_cost = *delay_cost + delay_delta_c; 01465 01466 update_td_cost(b_from, b_to, num_of_pins); 01467 } 01468 01469 /* update net cost functions and reset flags. */ 01470 01471 bb_index = 0; 01472 01473 for(k = 0; k < num_nets_affected; k++) 01474 { 01475 inet = nets_to_update[k]; 01476 01477 /* If we swapped two blocks connected to the same net, its bounding box * 01478 * doesn't change. */ 01479 01480 if(net_block_moved[k] == FROM_AND_TO) 01481 { 01482 temp_net_cost[inet] = -1; 01483 continue; 01484 } 01485 01486 bb_coords[inet] = bb_coord_new[bb_index]; 01487 if(net[inet].num_sinks >= SMALL_NET) 01488 bb_num_on_edges[inet] = bb_edge_new[bb_index]; 01489 01490 bb_index++; 01491 01492 net_cost[inet] = temp_net_cost[inet]; 01493 temp_net_cost[inet] = -1; 01494 } 01495 01496 /* Update fb data structures since we kept the move. */ 01497 /* Swap physical location */ 01498 grid[x_to][y_to].blocks[z_to] = b_from; 01499 grid[x_from][y_from].blocks[z_from] = b_to; 01500 01501 if(EMPTY == b_to) 01502 { /* Moved to an empty location */ 01503 grid[x_to][y_to].usage++; 01504 grid[x_from][y_from].usage--; 01505 } 01506 } 01507 01508 else 01509 { /* Move was rejected. */ 01510 01511 /* Reset the net cost function flags first. */ 01512 for(k = 0; k < num_nets_affected; k++) 01513 { 01514 inet = nets_to_update[k]; 01515 temp_net_cost[inet] = -1; 01516 } 01517 01518 /* Restore the block data structures to their state before the move. */ 01519 block[b_from].x = x_from; 01520 block[b_from].y = y_from; 01521 block[b_from].z = z_from; 01522 if(b_to != EMPTY) 01523 { 01524 block[b_to].x = x_to; 01525 block[b_to].y = y_to; 01526 block[b_to].z = z_to; 01527 } 01528 01529 /* Restore the region occupancies to their state before the move. */ 01530 if(place_cost_type == NONLINEAR_CONG) 01531 { 01532 restore_region_occ(old_region_occ_x, old_region_occ_y, 01533 num_regions); 01534 } 01535 } 01536 01537 return (keep_switch); 01538 }
static void update_bb | ( | int | inet, | |
struct s_bb * | bb_coord_new, | |||
struct s_bb * | bb_edge_new, | |||
int | xold, | |||
int | yold, | |||
int | xnew, | |||
int | ynew | |||
) | [static] |
Definition at line 3339 of file place.c.
03346 { 03347 03348 /* Updates the bounding box of a net by storing its coordinates in * 03349 * the bb_coord_new data structure and the number of blocks on each * 03350 * edge in the bb_edge_new data structure. This routine should only * 03351 * be called for large nets, since it has some overhead relative to * 03352 * just doing a brute force bounding box calculation. The bounding * 03353 * box coordinate and edge information for inet must be valid before * 03354 * this routine is called. * 03355 * Currently assumes channels on both sides of the CLBs forming the * 03356 * edges of the bounding box can be used. Essentially, I am assuming * 03357 * the pins always lie on the outside of the bounding box. */ 03358 03359 /* IO blocks are considered to be one cell in for simplicity. */ 03360 03361 xnew = max(min(xnew, nx), 1); 03362 ynew = max(min(ynew, ny), 1); 03363 xold = max(min(xold, nx), 1); 03364 yold = max(min(yold, ny), 1); 03365 03366 /* Check if I can update the bounding box incrementally. */ 03367 03368 if(xnew < xold) 03369 { /* Move to left. */ 03370 03371 /* Update the xmax fields for coordinates and number of edges first. */ 03372 03373 if(xold == bb_coords[inet].xmax) 03374 { /* Old position at xmax. */ 03375 if(bb_num_on_edges[inet].xmax == 1) 03376 { 03377 get_bb_from_scratch(inet, bb_coord_new, 03378 bb_edge_new); 03379 return; 03380 } 03381 else 03382 { 03383 bb_edge_new->xmax = 03384 bb_num_on_edges[inet].xmax - 1; 03385 bb_coord_new->xmax = bb_coords[inet].xmax; 03386 } 03387 } 03388 03389 else 03390 { /* Move to left, old postion was not at xmax. */ 03391 bb_coord_new->xmax = bb_coords[inet].xmax; 03392 bb_edge_new->xmax = bb_num_on_edges[inet].xmax; 03393 } 03394 03395 /* Now do the xmin fields for coordinates and number of edges. */ 03396 03397 if(xnew < bb_coords[inet].xmin) 03398 { /* Moved past xmin */ 03399 bb_coord_new->xmin = xnew; 03400 bb_edge_new->xmin = 1; 03401 } 03402 03403 else if(xnew == bb_coords[inet].xmin) 03404 { /* Moved to xmin */ 03405 bb_coord_new->xmin = xnew; 03406 bb_edge_new->xmin = bb_num_on_edges[inet].xmin + 1; 03407 } 03408 03409 else 03410 { /* Xmin unchanged. */ 03411 bb_coord_new->xmin = bb_coords[inet].xmin; 03412 bb_edge_new->xmin = bb_num_on_edges[inet].xmin; 03413 } 03414 } 03415 03416 /* End of move to left case. */ 03417 else if(xnew > xold) 03418 { /* Move to right. */ 03419 03420 /* Update the xmin fields for coordinates and number of edges first. */ 03421 03422 if(xold == bb_coords[inet].xmin) 03423 { /* Old position at xmin. */ 03424 if(bb_num_on_edges[inet].xmin == 1) 03425 { 03426 get_bb_from_scratch(inet, bb_coord_new, 03427 bb_edge_new); 03428 return; 03429 } 03430 else 03431 { 03432 bb_edge_new->xmin = 03433 bb_num_on_edges[inet].xmin - 1; 03434 bb_coord_new->xmin = bb_coords[inet].xmin; 03435 } 03436 } 03437 03438 else 03439 { /* Move to right, old position was not at xmin. */ 03440 bb_coord_new->xmin = bb_coords[inet].xmin; 03441 bb_edge_new->xmin = bb_num_on_edges[inet].xmin; 03442 } 03443 03444 /* Now do the xmax fields for coordinates and number of edges. */ 03445 03446 if(xnew > bb_coords[inet].xmax) 03447 { /* Moved past xmax. */ 03448 bb_coord_new->xmax = xnew; 03449 bb_edge_new->xmax = 1; 03450 } 03451 03452 else if(xnew == bb_coords[inet].xmax) 03453 { /* Moved to xmax */ 03454 bb_coord_new->xmax = xnew; 03455 bb_edge_new->xmax = bb_num_on_edges[inet].xmax + 1; 03456 } 03457 03458 else 03459 { /* Xmax unchanged. */ 03460 bb_coord_new->xmax = bb_coords[inet].xmax; 03461 bb_edge_new->xmax = bb_num_on_edges[inet].xmax; 03462 } 03463 } 03464 /* End of move to right case. */ 03465 else 03466 { /* xnew == xold -- no x motion. */ 03467 bb_coord_new->xmin = bb_coords[inet].xmin; 03468 bb_coord_new->xmax = bb_coords[inet].xmax; 03469 bb_edge_new->xmin = bb_num_on_edges[inet].xmin; 03470 bb_edge_new->xmax = bb_num_on_edges[inet].xmax; 03471 } 03472 03473 /* Now account for the y-direction motion. */ 03474 03475 if(ynew < yold) 03476 { /* Move down. */ 03477 03478 /* Update the ymax fields for coordinates and number of edges first. */ 03479 03480 if(yold == bb_coords[inet].ymax) 03481 { /* Old position at ymax. */ 03482 if(bb_num_on_edges[inet].ymax == 1) 03483 { 03484 get_bb_from_scratch(inet, bb_coord_new, 03485 bb_edge_new); 03486 return; 03487 } 03488 else 03489 { 03490 bb_edge_new->ymax = 03491 bb_num_on_edges[inet].ymax - 1; 03492 bb_coord_new->ymax = bb_coords[inet].ymax; 03493 } 03494 } 03495 03496 else 03497 { /* Move down, old postion was not at ymax. */ 03498 bb_coord_new->ymax = bb_coords[inet].ymax; 03499 bb_edge_new->ymax = bb_num_on_edges[inet].ymax; 03500 } 03501 03502 /* Now do the ymin fields for coordinates and number of edges. */ 03503 03504 if(ynew < bb_coords[inet].ymin) 03505 { /* Moved past ymin */ 03506 bb_coord_new->ymin = ynew; 03507 bb_edge_new->ymin = 1; 03508 } 03509 03510 else if(ynew == bb_coords[inet].ymin) 03511 { /* Moved to ymin */ 03512 bb_coord_new->ymin = ynew; 03513 bb_edge_new->ymin = bb_num_on_edges[inet].ymin + 1; 03514 } 03515 03516 else 03517 { /* ymin unchanged. */ 03518 bb_coord_new->ymin = bb_coords[inet].ymin; 03519 bb_edge_new->ymin = bb_num_on_edges[inet].ymin; 03520 } 03521 } 03522 /* End of move down case. */ 03523 else if(ynew > yold) 03524 { /* Moved up. */ 03525 03526 /* Update the ymin fields for coordinates and number of edges first. */ 03527 03528 if(yold == bb_coords[inet].ymin) 03529 { /* Old position at ymin. */ 03530 if(bb_num_on_edges[inet].ymin == 1) 03531 { 03532 get_bb_from_scratch(inet, bb_coord_new, 03533 bb_edge_new); 03534 return; 03535 } 03536 else 03537 { 03538 bb_edge_new->ymin = 03539 bb_num_on_edges[inet].ymin - 1; 03540 bb_coord_new->ymin = bb_coords[inet].ymin; 03541 } 03542 } 03543 03544 else 03545 { /* Moved up, old position was not at ymin. */ 03546 bb_coord_new->ymin = bb_coords[inet].ymin; 03547 bb_edge_new->ymin = bb_num_on_edges[inet].ymin; 03548 } 03549 03550 /* Now do the ymax fields for coordinates and number of edges. */ 03551 03552 if(ynew > bb_coords[inet].ymax) 03553 { /* Moved past ymax. */ 03554 bb_coord_new->ymax = ynew; 03555 bb_edge_new->ymax = 1; 03556 } 03557 03558 else if(ynew == bb_coords[inet].ymax) 03559 { /* Moved to ymax */ 03560 bb_coord_new->ymax = ynew; 03561 bb_edge_new->ymax = bb_num_on_edges[inet].ymax + 1; 03562 } 03563 03564 else 03565 { /* ymax unchanged. */ 03566 bb_coord_new->ymax = bb_coords[inet].ymax; 03567 bb_edge_new->ymax = bb_num_on_edges[inet].ymax; 03568 } 03569 } 03570 /* End of move up case. */ 03571 else 03572 { /* ynew == yold -- no y motion. */ 03573 bb_coord_new->ymin = bb_coords[inet].ymin; 03574 bb_coord_new->ymax = bb_coords[inet].ymax; 03575 bb_edge_new->ymin = bb_num_on_edges[inet].ymin; 03576 bb_edge_new->ymax = bb_num_on_edges[inet].ymax; 03577 } 03578 }
static void update_region_occ | ( | int | inet, | |
struct s_bb * | coords, | |||
int | add_or_sub, | |||
int | num_regions | |||
) | [static] |
Definition at line 2509 of file place.c.
02513 { 02514 02515 /* Called only when the place_cost_type is NONLINEAR_CONG. If add_or_sub * 02516 * is 1, this uses the new net bounding box to increase the occupancy * 02517 * of some regions. If add_or_sub = - 1, it decreases the occupancy * 02518 * by that due to this bounding box. */ 02519 02520 float net_xmin, net_xmax, net_ymin, net_ymax, crossing; 02521 float inv_region_len, inv_region_height; 02522 float inv_bb_len, inv_bb_height; 02523 float overlap_xlow, overlap_xhigh, overlap_ylow, overlap_yhigh; 02524 float y_overlap, x_overlap, x_occupancy, y_occupancy; 02525 int imin, imax, jmin, jmax, i, j; 02526 02527 if(net[inet].num_sinks >= 50) 02528 { 02529 crossing = 2.7933 + 0.02616 * ((net[inet].num_sinks + 1) - 50); 02530 } 02531 else 02532 { 02533 crossing = cross_count[net[inet].num_sinks]; 02534 } 02535 02536 net_xmin = coords->xmin - 0.5; 02537 net_xmax = coords->xmax + 0.5; 02538 net_ymin = coords->ymin - 0.5; 02539 net_ymax = coords->ymax + 0.5; 02540 02541 /* I could precompute the two values below. Should consider this. */ 02542 02543 inv_region_len = (float)num_regions / (float)nx; 02544 inv_region_height = (float)num_regions / (float)ny; 02545 02546 /* Get integer coordinates defining the rectangular area in which the * 02547 * subregions have to be updated. Formula is as follows: subtract * 02548 * 0.5 from net_xmin, etc. to get numbers from 0 to nx or ny; * 02549 * divide by nx or ny to scale between 0 and 1; multiply by * 02550 * num_regions to scale between 0 and num_regions; and truncate to * 02551 * get the final answer. */ 02552 02553 imin = (int)(net_xmin - 0.5) * inv_region_len; 02554 imax = (int)(net_xmax - 0.5) * inv_region_len; 02555 imax = min(imax, num_regions - 1); /* Watch for weird roundoff */ 02556 02557 jmin = (int)(net_ymin - 0.5) * inv_region_height; 02558 jmax = (int)(net_ymax - 0.5) * inv_region_height; 02559 jmax = min(jmax, num_regions - 1); /* Watch for weird roundoff */ 02560 02561 inv_bb_len = 1. / (net_xmax - net_xmin); 02562 inv_bb_height = 1. / (net_ymax - net_ymin); 02563 02564 /* See RISA paper (ICCAD '94, pp. 690 - 695) for a description of why * 02565 * I use exactly this cost function. */ 02566 02567 for(i = imin; i <= imax; i++) 02568 { 02569 for(j = jmin; j <= jmax; j++) 02570 { 02571 overlap_xlow = max(place_region_bounds_x[i], net_xmin); 02572 overlap_xhigh = 02573 min(place_region_bounds_x[i + 1], net_xmax); 02574 overlap_ylow = max(place_region_bounds_y[j], net_ymin); 02575 overlap_yhigh = 02576 min(place_region_bounds_y[j + 1], net_ymax); 02577 02578 x_overlap = overlap_xhigh - overlap_xlow; 02579 y_overlap = overlap_yhigh - overlap_ylow; 02580 02581 #ifdef DEBUG 02582 02583 if(x_overlap < -0.001) 02584 { 02585 printf 02586 ("Error in update_region_occ: x_overlap < 0" 02587 "\n inet = %d, overlap = %g\n", inet, 02588 x_overlap); 02589 } 02590 02591 if(y_overlap < -0.001) 02592 { 02593 printf 02594 ("Error in update_region_occ: y_overlap < 0" 02595 "\n inet = %d, overlap = %g\n", inet, 02596 y_overlap); 02597 } 02598 #endif 02599 02600 02601 x_occupancy = 02602 crossing * y_overlap * x_overlap * inv_bb_height * 02603 inv_region_len; 02604 y_occupancy = 02605 crossing * x_overlap * y_overlap * inv_bb_len * 02606 inv_region_height; 02607 02608 place_region_x[i][j].occupancy += 02609 add_or_sub * x_occupancy; 02610 place_region_y[i][j].occupancy += 02611 add_or_sub * y_occupancy; 02612 } 02613 } 02614 02615 }
static void update_rlim | ( | float * | rlim, | |
float | success_rat | |||
) | [static] |
Definition at line 1058 of file place.c.
01060 { 01061 01062 /* Update the range limited to keep acceptance prob. near 0.44. Use * 01063 * a floating point rlim to allow gradual transitions at low temps. */ 01064 01065 float upper_lim; 01066 01067 *rlim = (*rlim) * (1. - 0.44 + success_rat); 01068 upper_lim = max(nx, ny); 01069 *rlim = min(*rlim, upper_lim); 01070 *rlim = max(*rlim, 1.); 01071 }
static void update_t | ( | float * | t, | |
float | std_dev, | |||
float | rlim, | |||
float | success_rat, | |||
struct s_annealing_sched | annealing_sched | |||
) | [static] |
Definition at line 1076 of file place.c.
01081 { 01082 01083 /* float fac; */ 01084 01085 if(annealing_sched.type == USER_SCHED) 01086 { 01087 *t = annealing_sched.alpha_t * (*t); 01088 } 01089 01090 /* Old standard deviation based stuff is below. This bogs down horribly 01091 * for big circuits (alu4 and especially bigkey_mod). */ 01092 /* #define LAMBDA .7 */ 01093 /* ------------------------------------ */ 01094 #if 0 01095 else if(std_dev == 0.) 01096 { 01097 *t = 0.; 01098 } 01099 else 01100 { 01101 fac = exp(-LAMBDA * (*t) / std_dev); 01102 fac = max(0.5, fac); 01103 *t = (*t) * fac; 01104 } 01105 #endif 01106 /* ------------------------------------- */ 01107 01108 else 01109 { /* AUTO_SCHED */ 01110 if(success_rat > 0.96) 01111 { 01112 *t = (*t) * 0.5; 01113 } 01114 else if(success_rat > 0.8) 01115 { 01116 *t = (*t) * 0.9; 01117 } 01118 else if(success_rat > 0.15 || rlim > 1.) 01119 { 01120 *t = (*t) * 0.95; 01121 } 01122 else 01123 { 01124 *t = (*t) * 0.8; 01125 } 01126 } 01127 }
static void update_td_cost | ( | int | b_from, | |
int | b_to, | |||
int | num_of_pins | |||
) | [static] |
Definition at line 2028 of file place.c.
02031 { 02032 /*update the point_to_point_timing_cost values from the temporary */ 02033 /*values for all connections that have changed */ 02034 02035 int blkpin, net_pin, inet, ipin; 02036 02037 for(blkpin = 0; blkpin < num_of_pins; blkpin++) 02038 { 02039 02040 inet = block[b_from].nets[blkpin]; 02041 02042 if(inet == OPEN) 02043 continue; 02044 02045 if(net[inet].is_global) 02046 continue; 02047 02048 net_pin = net_pin_index[b_from][blkpin]; 02049 02050 if(net_pin != 0) 02051 { 02052 02053 /*the following "if" prevents the value from being updated twice */ 02054 if(net[inet].node_block[0] != b_to 02055 && net[inet].node_block[0] != b_from) 02056 { 02057 02058 point_to_point_delay_cost[inet][net_pin] = 02059 temp_point_to_point_delay_cost[inet][net_pin]; 02060 temp_point_to_point_delay_cost[inet][net_pin] = 02061 -1; 02062 02063 point_to_point_timing_cost[inet][net_pin] = 02064 temp_point_to_point_timing_cost[inet] 02065 [net_pin]; 02066 temp_point_to_point_timing_cost[inet][net_pin] = 02067 -1; 02068 } 02069 } 02070 else 02071 { /*this net is being driven by a moved block, recompute */ 02072 /*all point to point connections on this net. */ 02073 for(ipin = 1; ipin <= net[inet].num_sinks; ipin++) 02074 { 02075 02076 point_to_point_delay_cost[inet][ipin] = 02077 temp_point_to_point_delay_cost[inet][ipin]; 02078 temp_point_to_point_delay_cost[inet][ipin] = -1; 02079 02080 point_to_point_timing_cost[inet][ipin] = 02081 temp_point_to_point_timing_cost[inet][ipin]; 02082 temp_point_to_point_timing_cost[inet][ipin] = -1; 02083 } 02084 } 02085 } 02086 02087 if(b_to != EMPTY) 02088 { 02089 for(blkpin = 0; blkpin < num_of_pins; blkpin++) 02090 { 02091 02092 inet = block[b_to].nets[blkpin]; 02093 02094 if(inet == OPEN) 02095 continue; 02096 02097 if(net[inet].is_global) 02098 continue; 02099 02100 net_pin = net_pin_index[b_to][blkpin]; 02101 02102 if(net_pin != 0) 02103 { 02104 02105 /*the following "if" prevents the value from being updated 2x */ 02106 if(net[inet].node_block[0] != b_to 02107 && net[inet].node_block[0] != b_from) 02108 { 02109 02110 point_to_point_delay_cost[inet][net_pin] = 02111 temp_point_to_point_delay_cost[inet] 02112 [net_pin]; 02113 temp_point_to_point_delay_cost[inet] 02114 [net_pin] = -1; 02115 02116 point_to_point_timing_cost[inet][net_pin] 02117 = 02118 temp_point_to_point_timing_cost[inet] 02119 [net_pin]; 02120 temp_point_to_point_timing_cost[inet] 02121 [net_pin] = -1; 02122 } 02123 } 02124 else 02125 { /*this net is being driven by a moved block, recompute */ 02126 /*all point to point connections on this net. */ 02127 for(ipin = 1; ipin <= net[inet].num_sinks; ipin++) 02128 { 02129 02130 point_to_point_delay_cost[inet][ipin] = 02131 temp_point_to_point_delay_cost[inet] 02132 [ipin]; 02133 temp_point_to_point_delay_cost[inet][ipin] 02134 = -1; 02135 02136 point_to_point_timing_cost[inet][ipin] = 02137 temp_point_to_point_timing_cost[inet] 02138 [ipin]; 02139 temp_point_to_point_timing_cost[inet] 02140 [ipin] = -1; 02141 } 02142 } 02143 } 02144 } 02145 }
struct s_bb * bb_num_on_edges = NULL [static] |
float** chanx_place_cost_fac [static] |
float ** chany_place_cost_fac [static] |
const float cross_count[50] [static] |
{ 1.0, 1.0, 1.0, 1.0828, 1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493, 1.4974, 1.5455, 1.5937, 1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519, 1.8924, 1.9288, 1.9652, 2.0015, 2.0379, 2.0743, 2.1061, 2.1379, 2.1698, 2.2016, 2.2334, 2.2646, 2.2958, 2.3271, 2.3583, 2.3895, 2.4187, 2.4479, 2.4772, 2.5064, 2.5356, 2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410, 2.7671, 2.7933 }
int* duplicate_pins [static] |
int** net_pin_index = NULL [static] |
float* place_region_bounds_x [static] |
float * place_region_bounds_y [static] |
struct s_place_region** place_region_x [static] |
struct s_place_region ** place_region_y [static] |
float** point_to_point_delay_cost = NULL [static] |
float** point_to_point_timing_cost = NULL [static] |
float * temp_net_cost = NULL [static] |
float** temp_point_to_point_delay_cost = NULL [static] |
float** temp_point_to_point_timing_cost = NULL [static] |
int** unique_pin_list [static] |