VPR-6.0
|
Go to the source code of this file.
Functions | |
void | try_place (struct s_placer_opts placer_opts, struct s_annealing_sched annealing_sched, t_chan_width_dist chan_width_dist, struct s_router_opts router_opts, struct s_det_routing_arch det_routing_arch, t_segment_inf *segment_inf, t_timing_inf timing_inf, t_mst_edge ***mst) |
void try_place | ( | struct s_placer_opts | placer_opts, |
struct s_annealing_sched | annealing_sched, | ||
t_chan_width_dist | chan_width_dist, | ||
struct s_router_opts | router_opts, | ||
struct s_det_routing_arch | det_routing_arch, | ||
t_segment_inf * | segment_inf, | ||
t_timing_inf | timing_inf, | ||
t_mst_edge *** | mst | ||
) |
Does almost all the work of placing a circuit. Width_fac gives the width of the widest channel. Place_cost_exp says what exponent the width should be taken to when calculating costs. This allows a greater bias for anisotropic architectures. Place_cost_type determines which cost function is used. num_regions is used only the place_cost_type is NONLINEAR_CONG.
Definition at line 291 of file place.c.
{ int tot_iter, inner_iter, success_sum; int move_lim, moves_since_cost_recompute, width_fac; float t, success_rat, rlim, d_max, est_crit; float cost, timing_cost, bb_cost, new_bb_cost, new_timing_cost; float delay_cost, new_delay_cost, place_delay_value; float inverse_prev_bb_cost, inverse_prev_timing_cost; float oldt; double av_cost, av_bb_cost, av_timing_cost, av_delay_cost, sum_of_squares, std_dev; float **old_region_occ_x, **old_region_occ_y; char msg[BUFSIZE]; boolean fixed_pins; /* Can pads move or not? */ int num_connections; int inet, ipin, outer_crit_iter_count, inner_crit_iter_count, inner_recompute_limit; float **net_slack, **net_delay; float crit_exponent; float first_rlim, final_rlim, inverse_delta_rlim; float **remember_net_delay_original_ptr; /*used to free net_delay if it is re-assigned */ int *x_lookup; /* Used to quickly determine valid swap columns */ /* Allocated here because it goes into timing critical code where each memory allocation is expensive */ x_lookup = my_malloc(nx * sizeof(int)); net_delay = net_slack = NULL; remember_net_delay_original_ptr = NULL; /*prevents compiler warning */ if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || placer_opts.enable_timing_computations) { /*do this before the initial placement to avoid messing up the initial placement */ alloc_lookups_and_criticalities(chan_width_dist, router_opts, det_routing_arch, segment_inf, timing_inf, &net_delay, &net_slack); remember_net_delay_original_ptr = net_delay; /*#define PRINT_LOWER_BOUND */ #ifdef PRINT_LOWER_BOUND /*print the crit_path, assuming delay between blocks that are* *block_dist apart*/ if(placer_opts.block_dist <= nx) place_delay_value = delta_clb_to_clb[placer_opts.block_dist][0]; else if(placer_opts.block_dist <= ny) place_delay_value = delta_clb_to_clb[0][placer_opts.block_dist]; else place_delay_value = delta_clb_to_clb[nx][ny]; printf("\nLower bound assuming delay of %g\n", place_delay_value); load_constant_net_delay(net_delay, place_delay_value); load_timing_graph_net_delays(net_delay); d_max = load_net_slack(net_slack, 0); #ifdef CREATE_ECHO_FILES print_critical_path("Placement_Lower_Bound.echo"); print_sink_delays("Placement_Lower_Bound_Sink_Delays.echo"); #endif /* CREATE_ECHO_FILES */ /*also print sink delays assuming 0 delay between blocks, * this tells us how much logic delay is on each path */ load_constant_net_delay(net_delay, 0); load_timing_graph_net_delays(net_delay); d_max = load_net_slack(net_slack, 0); #ifdef CREATE_ECHO_FILES print_sink_delays("Placement_Logic_Sink_Delays.echo"); #endif /* CREATE_ECHO_FILES */ #endif } width_fac = placer_opts.place_chan_width; if(placer_opts.pad_loc_type == FREE) fixed_pins = FALSE; else fixed_pins = TRUE; init_chan(width_fac, chan_width_dist); alloc_and_load_placement_structs(placer_opts.place_cost_type, placer_opts.num_regions, placer_opts.place_cost_exp, &old_region_occ_x, &old_region_occ_y, placer_opts); initial_placement(placer_opts.pad_loc_type, placer_opts.pad_loc_file); init_draw_coords((float)width_fac); /* Storing the number of pins on each type of block makes the swap routine * * slightly more efficient. */ /* Gets initial cost and loads bounding boxes. */ if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { bb_cost = comp_bb_cost(NORMAL, placer_opts.place_cost_type, placer_opts.num_regions); crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */ compute_net_pin_index_values(); num_connections = count_connections(); printf ("\nThere are %d point to point connections in this circuit\n\n", num_connections); if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) { for(inet = 0; inet < num_nets; inet++) for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) timing_place_crit[inet][ipin] = 0; /*dummy crit values */ comp_td_costs(&timing_cost, &delay_cost); /*first pass gets delay_cost, which is used * in criticality computations in the next call * to comp_td_costs. */ place_delay_value = delay_cost / num_connections; /*used for computing criticalities */ load_constant_net_delay(net_delay, place_delay_value, clb_net, num_nets); } else place_delay_value = 0; if(placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { net_delay = point_to_point_delay_cost; /*this keeps net_delay up to date with * * *the same values that the placer is using * * *point_to_point_delay_cost is computed each* * *time that comp_td_costs is called, and is * * *also updated after any swap is accepted */ } load_timing_graph_net_delays(net_delay); d_max = load_net_slack(net_slack, 0); load_criticalities(placer_opts, net_slack, d_max, crit_exponent); outer_crit_iter_count = 1; /*now we can properly compute costs */ comp_td_costs(&timing_cost, &delay_cost); /*also puts proper values into point_to_point_delay_cost */ inverse_prev_timing_cost = 1 / timing_cost; inverse_prev_bb_cost = 1 / bb_cost; cost = 1; /*our new cost function uses normalized values of */ /*bb_cost and timing_cost, the value of cost will be reset */ /*to 1 at each temperature when *_TIMING_DRIVEN_PLACE is true */ } else { /*BOUNDING_BOX_PLACE */ cost = bb_cost = comp_bb_cost(NORMAL, placer_opts.place_cost_type, placer_opts.num_regions); timing_cost = 0; delay_cost = 0; place_delay_value = 0; outer_crit_iter_count = 0; num_connections = 0; d_max = 0; crit_exponent = 0; inverse_prev_timing_cost = 0; /*inverses not used */ inverse_prev_bb_cost = 0; } move_lim = (int)(annealing_sched.inner_num * pow(num_blocks, 1.3333)); if(placer_opts.inner_loop_recompute_divider != 0) inner_recompute_limit = (int)(0.5 + (float)move_lim / (float)placer_opts. inner_loop_recompute_divider); else /*don't do an inner recompute */ inner_recompute_limit = move_lim + 1; /* Sometimes I want to run the router with a random placement. Avoid * * using 0 moves to stop division by 0 and 0 length vector problems, * * by setting move_lim to 1 (which is still too small to do any * * significant optimization). */ if(move_lim <= 0) move_lim = 1; rlim = (float)max(nx, ny); first_rlim = rlim; /*used in timing-driven placement for exponent computation */ final_rlim = 1; inverse_delta_rlim = 1 / (first_rlim - final_rlim); t = starting_t(&cost, &bb_cost, &timing_cost, placer_opts.place_cost_type, old_region_occ_x, old_region_occ_y, placer_opts.num_regions, fixed_pins, annealing_sched, move_lim, rlim, placer_opts.place_algorithm, placer_opts.timing_tradeoff, inverse_prev_bb_cost, inverse_prev_timing_cost, &delay_cost); tot_iter = 0; moves_since_cost_recompute = 0; printf ("Initial Placement Cost: %g bb_cost: %g td_cost: %g delay_cost: %g\n\n", cost, bb_cost, timing_cost, delay_cost); #ifndef SPEC printf ("%11s %10s %11s %11s %11s %11s %11s %9s %8s %7s %7s %10s %7s\n", "T", "Cost", "Av. BB Cost", "Av. TD Cost", "Av Tot Del", "P to P Del", "d_max", "Ac Rate", "Std Dev", "R limit", "Exp", "Tot. Moves", "Alpha"); printf ("%11s %10s %11s %11s %11s %11s %11s %9s %8s %7s %7s %10s %7s\n", "--------", "----------", "-----------", "-----------", "---------", "----------", "-----", "-------", "-------", "-------", "-------", "----------", "-----"); #endif sprintf(msg, "Initial Placement. Cost: %g BB Cost: %g TD Cost %g Delay Cost: %g " "\t d_max %g Channel Factor: %d", cost, bb_cost, timing_cost, delay_cost, d_max, width_fac); update_screen(MAJOR, msg, PLACEMENT, FALSE); while(exit_crit(t, cost, annealing_sched) == 0) { if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { cost = 1; } av_cost = 0.; av_bb_cost = 0.; av_delay_cost = 0.; av_timing_cost = 0.; sum_of_squares = 0.; success_sum = 0; if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { if(outer_crit_iter_count >= placer_opts.recompute_crit_iter || placer_opts.inner_loop_recompute_divider != 0) { #ifdef VERBOSE printf("Outer Loop Recompute Criticalities\n"); #endif place_delay_value = delay_cost / num_connections; if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) load_constant_net_delay(net_delay, place_delay_value, clb_net, num_nets); /*note, for path_based, the net delay is not updated since it is current, *because it accesses point_to_point_delay array */ load_timing_graph_net_delays(net_delay); d_max = load_net_slack(net_slack, 0); load_criticalities(placer_opts, net_slack, d_max, crit_exponent); /*recompute costs from scratch, based on new criticalities */ comp_td_costs(&timing_cost, &delay_cost); outer_crit_iter_count = 0; } outer_crit_iter_count++; /*at each temperature change we update these values to be used */ /*for normalizing the tradeoff between timing and wirelength (bb) */ inverse_prev_bb_cost = 1 / bb_cost; inverse_prev_timing_cost = 1 / timing_cost; } inner_crit_iter_count = 1; for(inner_iter = 0; inner_iter < move_lim; inner_iter++) { if(try_swap(t, &cost, &bb_cost, &timing_cost, rlim, placer_opts.place_cost_type, old_region_occ_x, old_region_occ_y, placer_opts.num_regions, fixed_pins, placer_opts.place_algorithm, placer_opts.timing_tradeoff, inverse_prev_bb_cost, inverse_prev_timing_cost, &delay_cost, x_lookup) == 1) { success_sum++; av_cost += cost; av_bb_cost += bb_cost; av_timing_cost += timing_cost; av_delay_cost += delay_cost; sum_of_squares += cost * cost; } if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { if(inner_crit_iter_count >= inner_recompute_limit && inner_iter != move_lim - 1) { /*on last iteration don't recompute */ inner_crit_iter_count = 0; #ifdef VERBOSE printf ("Inner Loop Recompute Criticalities\n"); #endif if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) { place_delay_value = delay_cost / num_connections; load_constant_net_delay(net_delay, place_delay_value, clb_net, num_nets); } load_timing_graph_net_delays(net_delay); d_max = load_net_slack(net_slack, 0); load_criticalities(placer_opts, net_slack, d_max, crit_exponent); comp_td_costs(&timing_cost, &delay_cost); } inner_crit_iter_count++; } #ifdef VERBOSE printf ("t = %g cost = %g bb_cost = %g timing_cost = %g move = %d dmax = %g\n", t, cost, bb_cost, timing_cost, inner_iter, d_max); if(fabs (bb_cost - comp_bb_cost(CHECK, placer_opts.place_cost_type, placer_opts.num_regions)) > bb_cost * ERROR_TOL) exit(1); #endif } /* Lines below prevent too much round-off error from accumulating * * in the cost over many iterations. This round-off can lead to * * error checks failing because the cost is different from what * * you get when you recompute from scratch. */ moves_since_cost_recompute += move_lim; if(moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE) { new_bb_cost = recompute_bb_cost(placer_opts.place_cost_type, placer_opts.num_regions); if(fabs(new_bb_cost - bb_cost) > bb_cost * ERROR_TOL) { printf ("Error in try_place: new_bb_cost = %g, old bb_cost = %g.\n", new_bb_cost, bb_cost); exit(1); } bb_cost = new_bb_cost; if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { comp_td_costs(&new_timing_cost, &new_delay_cost); if(fabs(new_timing_cost - timing_cost) > timing_cost * ERROR_TOL) { printf ("Error in try_place: new_timing_cost = %g, old timing_cost = %g.\n", new_timing_cost, timing_cost); exit(1); } if(fabs(new_delay_cost - delay_cost) > delay_cost * ERROR_TOL) { printf ("Error in try_place: new_delay_cost = %g, old delay_cost = %g.\n", new_delay_cost, delay_cost); exit(1); } timing_cost = new_timing_cost; } if(placer_opts.place_algorithm == BOUNDING_BOX_PLACE) { cost = new_bb_cost; } moves_since_cost_recompute = 0; } tot_iter += move_lim; success_rat = ((float)success_sum) / move_lim; if(success_sum == 0) { av_cost = cost; av_bb_cost = bb_cost; av_timing_cost = timing_cost; av_delay_cost = delay_cost; } else { av_cost /= success_sum; av_bb_cost /= success_sum; av_timing_cost /= success_sum; av_delay_cost /= success_sum; } std_dev = get_std_dev(success_sum, sum_of_squares, av_cost); #ifndef SPEC printf ("%11.5g %10.6g %11.6g %11.6g %11.6g %11.6g %11.4g %9.4g %8.3g %7.4g %7.4g %10d ", t, av_cost, av_bb_cost, av_timing_cost, av_delay_cost, place_delay_value, d_max, success_rat, std_dev, rlim, crit_exponent, tot_iter); #endif oldt = t; /* for finding and printing alpha. */ update_t(&t, std_dev, rlim, success_rat, annealing_sched); #ifndef SPEC printf("%7.4g\n", t / oldt); fflush(stdout); #endif sprintf(msg, "Cost: %g BB Cost %g TD Cost %g Temperature: %g d_max: %g", cost, bb_cost, timing_cost, t, d_max); update_screen(MINOR, msg, PLACEMENT, FALSE); update_rlim(&rlim, success_rat); if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { crit_exponent = (1 - (rlim - final_rlim) * inverse_delta_rlim) * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first) + placer_opts.td_place_exp_first; } #ifdef VERBOSE dump_clbs(); #endif } t = 0; /* freeze out */ av_cost = 0.; av_bb_cost = 0.; av_timing_cost = 0.; sum_of_squares = 0.; av_delay_cost = 0.; success_sum = 0; if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { /*at each temperature change we update these values to be used */ /*for normalizing the tradeoff between timing and wirelength (bb) */ if(outer_crit_iter_count >= placer_opts.recompute_crit_iter || placer_opts.inner_loop_recompute_divider != 0) { #ifdef VERBOSE printf("Outer Loop Recompute Criticalities\n"); #endif place_delay_value = delay_cost / num_connections; if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) load_constant_net_delay(net_delay, place_delay_value, clb_net, num_nets); load_timing_graph_net_delays(net_delay); d_max = load_net_slack(net_slack, 0); load_criticalities(placer_opts, net_slack, d_max, crit_exponent); /*recompute criticaliies */ comp_td_costs(&timing_cost, &delay_cost); outer_crit_iter_count = 0; } outer_crit_iter_count++; inverse_prev_bb_cost = 1 / (bb_cost); inverse_prev_timing_cost = 1 / (timing_cost); } inner_crit_iter_count = 1; for(inner_iter = 0; inner_iter < move_lim; inner_iter++) { if(try_swap(t, &cost, &bb_cost, &timing_cost, rlim, placer_opts.place_cost_type, old_region_occ_x, old_region_occ_y, placer_opts.num_regions, fixed_pins, placer_opts.place_algorithm, placer_opts.timing_tradeoff, inverse_prev_bb_cost, inverse_prev_timing_cost, &delay_cost, x_lookup) == 1) { success_sum++; av_cost += cost; av_bb_cost += bb_cost; av_delay_cost += delay_cost; av_timing_cost += timing_cost; sum_of_squares += cost * cost; if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { if(inner_crit_iter_count >= inner_recompute_limit && inner_iter != move_lim - 1) { inner_crit_iter_count = 0; #ifdef VERBOSE printf ("Inner Loop Recompute Criticalities\n"); #endif if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) { place_delay_value = delay_cost / num_connections; load_constant_net_delay(net_delay, place_delay_value, clb_net, num_nets); } load_timing_graph_net_delays(net_delay); d_max = load_net_slack(net_slack, 0); load_criticalities(placer_opts, net_slack, d_max, crit_exponent); comp_td_costs(&timing_cost, &delay_cost); } inner_crit_iter_count++; } } #ifdef VERBOSE printf("t = %g cost = %g move = %d\n", t, cost, tot_iter); #endif } tot_iter += move_lim; success_rat = ((float)success_sum) / move_lim; if(success_sum == 0) { av_cost = cost; av_bb_cost = bb_cost; av_delay_cost = delay_cost; av_timing_cost = timing_cost; } else { av_cost /= success_sum; av_bb_cost /= success_sum; av_delay_cost /= success_sum; av_timing_cost /= success_sum; } std_dev = get_std_dev(success_sum, sum_of_squares, av_cost); #ifndef SPEC printf ("%11.5g %10.6g %11.6g %11.6g %11.6g %11.6g %11.4g %9.4g %8.3g %7.4g %7.4g %10d \n\n", t, av_cost, av_bb_cost, av_timing_cost, av_delay_cost, place_delay_value, d_max, success_rat, std_dev, rlim, crit_exponent, tot_iter); #endif #ifdef VERBOSE dump_clbs(); #endif check_place(bb_cost, timing_cost, placer_opts.place_cost_type, placer_opts.num_regions, placer_opts.place_algorithm, delay_cost); if(placer_opts.enable_timing_computations && placer_opts.place_algorithm == BOUNDING_BOX_PLACE) { /*need this done since the timing data has not been kept up to date* *in bounding_box mode */ for(inet = 0; inet < num_nets; inet++) for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++) timing_place_crit[inet][ipin] = 0; /*dummy crit values */ comp_td_costs(&timing_cost, &delay_cost); /*computes point_to_point_delay_cost */ } if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || placer_opts.enable_timing_computations) { net_delay = point_to_point_delay_cost; /*this makes net_delay up to date with * *the same values that the placer is using*/ load_timing_graph_net_delays(net_delay); est_crit = load_net_slack(net_slack, 0); #ifdef CREATE_ECHO_FILES /* print_sink_delays("placement_sink_delays.echo"); */ print_net_slack("placement_net_slacks.echo", net_slack); print_critical_path("placement_crit_path.echo"); #endif /* CREATE_ECHO_FILES */ printf("Placement Estimated Crit Path Delay: %g\n\n", est_crit); } sprintf(msg, "Placement. Cost: %g bb_cost: %g td_cost: %g Channel Factor: %d d_max: %g", cost, bb_cost, timing_cost, width_fac, d_max); printf ("Placement. Cost: %g bb_cost: %g td_cost: %g delay_cost: %g.\n", cost, bb_cost, timing_cost, delay_cost); update_screen(MAJOR, msg, PLACEMENT, FALSE); #ifdef SPEC printf("Total moves attempted: %d.0\n", tot_iter); #endif if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || placer_opts.enable_timing_computations) { net_delay = remember_net_delay_original_ptr; free_placement_structs(placer_opts.place_cost_type, placer_opts.num_regions, old_region_occ_x, old_region_occ_y, placer_opts); free_lookups_and_criticalities(&net_delay, &net_slack); } /* placement is done - find mst of all nets. * creating mst for each net; this gives me an ordering of sinks * by which I will direct search (A*) for. */ if(*mst) { for(inet = 0; inet < num_nets; inet++) { assert((*mst)[inet]); free((*mst)[inet]); } free(*mst); } *mst = (t_mst_edge **) my_malloc(sizeof(t_mst_edge *) * num_nets); for(inet = 0; inet < num_nets; inet++) { (*mst)[inet] = get_mst_of_net(inet); } free(x_lookup); }