|
VPR-6.0
|
This graph shows which files directly or indirectly include this file:Go to the source code of this file.
Functions | |
| void | try_place (struct s_placer_opts placer_opts, struct s_annealing_sched annealing_sched, t_chan_width_dist chan_width_dist, struct s_router_opts router_opts, struct s_det_routing_arch det_routing_arch, t_segment_inf *segment_inf, t_timing_inf timing_inf, t_mst_edge ***mst) |
| void try_place | ( | struct s_placer_opts | placer_opts, |
| struct s_annealing_sched | annealing_sched, | ||
| t_chan_width_dist | chan_width_dist, | ||
| struct s_router_opts | router_opts, | ||
| struct s_det_routing_arch | det_routing_arch, | ||
| t_segment_inf * | segment_inf, | ||
| t_timing_inf | timing_inf, | ||
| t_mst_edge *** | mst | ||
| ) |
Does almost all the work of placing a circuit. Width_fac gives the width of the widest channel. Place_cost_exp says what exponent the width should be taken to when calculating costs. This allows a greater bias for anisotropic architectures. Place_cost_type determines which cost function is used. num_regions is used only the place_cost_type is NONLINEAR_CONG.
Definition at line 291 of file place.c.
{
int tot_iter, inner_iter, success_sum;
int move_lim, moves_since_cost_recompute, width_fac;
float t, success_rat, rlim, d_max, est_crit;
float cost, timing_cost, bb_cost, new_bb_cost, new_timing_cost;
float delay_cost, new_delay_cost, place_delay_value;
float inverse_prev_bb_cost, inverse_prev_timing_cost;
float oldt;
double av_cost, av_bb_cost, av_timing_cost, av_delay_cost,
sum_of_squares, std_dev;
float **old_region_occ_x, **old_region_occ_y;
char msg[BUFSIZE];
boolean fixed_pins; /* Can pads move or not? */
int num_connections;
int inet, ipin, outer_crit_iter_count, inner_crit_iter_count,
inner_recompute_limit;
float **net_slack, **net_delay;
float crit_exponent;
float first_rlim, final_rlim, inverse_delta_rlim;
float **remember_net_delay_original_ptr; /*used to free net_delay if it is re-assigned */
int *x_lookup; /* Used to quickly determine valid swap columns */
/* Allocated here because it goes into timing critical code where each memory allocation is expensive */
x_lookup = my_malloc(nx * sizeof(int));
net_delay = net_slack = NULL;
remember_net_delay_original_ptr = NULL; /*prevents compiler warning */
if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE ||
placer_opts.enable_timing_computations)
{
/*do this before the initial placement to avoid messing up the initial placement */
alloc_lookups_and_criticalities(chan_width_dist,
router_opts,
det_routing_arch,
segment_inf,
timing_inf,
&net_delay, &net_slack);
remember_net_delay_original_ptr = net_delay;
/*#define PRINT_LOWER_BOUND */
#ifdef PRINT_LOWER_BOUND
/*print the crit_path, assuming delay between blocks that are*
*block_dist apart*/
if(placer_opts.block_dist <= nx)
place_delay_value =
delta_clb_to_clb[placer_opts.block_dist][0];
else if(placer_opts.block_dist <= ny)
place_delay_value =
delta_clb_to_clb[0][placer_opts.block_dist];
else
place_delay_value = delta_clb_to_clb[nx][ny];
printf("\nLower bound assuming delay of %g\n", place_delay_value);
load_constant_net_delay(net_delay, place_delay_value);
load_timing_graph_net_delays(net_delay);
d_max = load_net_slack(net_slack, 0);
#ifdef CREATE_ECHO_FILES
print_critical_path("Placement_Lower_Bound.echo");
print_sink_delays("Placement_Lower_Bound_Sink_Delays.echo");
#endif /* CREATE_ECHO_FILES */
/*also print sink delays assuming 0 delay between blocks,
* this tells us how much logic delay is on each path */
load_constant_net_delay(net_delay, 0);
load_timing_graph_net_delays(net_delay);
d_max = load_net_slack(net_slack, 0);
#ifdef CREATE_ECHO_FILES
print_sink_delays("Placement_Logic_Sink_Delays.echo");
#endif /* CREATE_ECHO_FILES */
#endif
}
width_fac = placer_opts.place_chan_width;
if(placer_opts.pad_loc_type == FREE)
fixed_pins = FALSE;
else
fixed_pins = TRUE;
init_chan(width_fac, chan_width_dist);
alloc_and_load_placement_structs(placer_opts.place_cost_type,
placer_opts.num_regions,
placer_opts.place_cost_exp,
&old_region_occ_x, &old_region_occ_y,
placer_opts);
initial_placement(placer_opts.pad_loc_type, placer_opts.pad_loc_file);
init_draw_coords((float)width_fac);
/* Storing the number of pins on each type of block makes the swap routine *
* slightly more efficient. */
/* Gets initial cost and loads bounding boxes. */
if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
{
bb_cost = comp_bb_cost(NORMAL, placer_opts.place_cost_type,
placer_opts.num_regions);
crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */
compute_net_pin_index_values();
num_connections = count_connections();
printf
("\nThere are %d point to point connections in this circuit\n\n",
num_connections);
if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE)
{
for(inet = 0; inet < num_nets; inet++)
for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
timing_place_crit[inet][ipin] = 0; /*dummy crit values */
comp_td_costs(&timing_cost, &delay_cost); /*first pass gets delay_cost, which is used
* in criticality computations in the next call
* to comp_td_costs. */
place_delay_value = delay_cost / num_connections; /*used for computing criticalities */
load_constant_net_delay(net_delay, place_delay_value, clb_net, num_nets);
}
else
place_delay_value = 0;
if(placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
{
net_delay = point_to_point_delay_cost; /*this keeps net_delay up to date with *
* *the same values that the placer is using *
* *point_to_point_delay_cost is computed each*
* *time that comp_td_costs is called, and is *
* *also updated after any swap is accepted */
}
load_timing_graph_net_delays(net_delay);
d_max = load_net_slack(net_slack, 0);
load_criticalities(placer_opts, net_slack, d_max, crit_exponent);
outer_crit_iter_count = 1;
/*now we can properly compute costs */
comp_td_costs(&timing_cost, &delay_cost); /*also puts proper values into point_to_point_delay_cost */
inverse_prev_timing_cost = 1 / timing_cost;
inverse_prev_bb_cost = 1 / bb_cost;
cost = 1; /*our new cost function uses normalized values of */
/*bb_cost and timing_cost, the value of cost will be reset */
/*to 1 at each temperature when *_TIMING_DRIVEN_PLACE is true */
}
else
{ /*BOUNDING_BOX_PLACE */
cost = bb_cost = comp_bb_cost(NORMAL, placer_opts.place_cost_type,
placer_opts.num_regions);
timing_cost = 0;
delay_cost = 0;
place_delay_value = 0;
outer_crit_iter_count = 0;
num_connections = 0;
d_max = 0;
crit_exponent = 0;
inverse_prev_timing_cost = 0; /*inverses not used */
inverse_prev_bb_cost = 0;
}
move_lim = (int)(annealing_sched.inner_num * pow(num_blocks, 1.3333));
if(placer_opts.inner_loop_recompute_divider != 0)
inner_recompute_limit = (int)(0.5 + (float)move_lim /
(float)placer_opts.
inner_loop_recompute_divider);
else /*don't do an inner recompute */
inner_recompute_limit = move_lim + 1;
/* Sometimes I want to run the router with a random placement. Avoid *
* using 0 moves to stop division by 0 and 0 length vector problems, *
* by setting move_lim to 1 (which is still too small to do any *
* significant optimization). */
if(move_lim <= 0)
move_lim = 1;
rlim = (float)max(nx, ny);
first_rlim = rlim; /*used in timing-driven placement for exponent computation */
final_rlim = 1;
inverse_delta_rlim = 1 / (first_rlim - final_rlim);
t = starting_t(&cost, &bb_cost, &timing_cost,
placer_opts.place_cost_type,
old_region_occ_x, old_region_occ_y,
placer_opts.num_regions, fixed_pins, annealing_sched,
move_lim, rlim, placer_opts.place_algorithm,
placer_opts.timing_tradeoff, inverse_prev_bb_cost,
inverse_prev_timing_cost, &delay_cost);
tot_iter = 0;
moves_since_cost_recompute = 0;
printf
("Initial Placement Cost: %g bb_cost: %g td_cost: %g delay_cost: %g\n\n",
cost, bb_cost, timing_cost, delay_cost);
#ifndef SPEC
printf
("%11s %10s %11s %11s %11s %11s %11s %9s %8s %7s %7s %10s %7s\n",
"T", "Cost", "Av. BB Cost", "Av. TD Cost", "Av Tot Del",
"P to P Del", "d_max", "Ac Rate", "Std Dev", "R limit", "Exp",
"Tot. Moves", "Alpha");
printf
("%11s %10s %11s %11s %11s %11s %11s %9s %8s %7s %7s %10s %7s\n",
"--------", "----------", "-----------", "-----------",
"---------", "----------", "-----", "-------", "-------",
"-------", "-------", "----------", "-----");
#endif
sprintf(msg,
"Initial Placement. Cost: %g BB Cost: %g TD Cost %g Delay Cost: %g "
"\t d_max %g Channel Factor: %d", cost, bb_cost, timing_cost,
delay_cost, d_max, width_fac);
update_screen(MAJOR, msg, PLACEMENT, FALSE);
while(exit_crit(t, cost, annealing_sched) == 0)
{
if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
{
cost = 1;
}
av_cost = 0.;
av_bb_cost = 0.;
av_delay_cost = 0.;
av_timing_cost = 0.;
sum_of_squares = 0.;
success_sum = 0;
if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
{
if(outer_crit_iter_count >=
placer_opts.recompute_crit_iter
|| placer_opts.inner_loop_recompute_divider != 0)
{
#ifdef VERBOSE
printf("Outer Loop Recompute Criticalities\n");
#endif
place_delay_value = delay_cost / num_connections;
if(placer_opts.place_algorithm ==
NET_TIMING_DRIVEN_PLACE)
load_constant_net_delay(net_delay,
place_delay_value, clb_net, num_nets);
/*note, for path_based, the net delay is not updated since it is current,
*because it accesses point_to_point_delay array */
load_timing_graph_net_delays(net_delay);
d_max = load_net_slack(net_slack, 0);
load_criticalities(placer_opts, net_slack, d_max,
crit_exponent);
/*recompute costs from scratch, based on new criticalities */
comp_td_costs(&timing_cost, &delay_cost);
outer_crit_iter_count = 0;
}
outer_crit_iter_count++;
/*at each temperature change we update these values to be used */
/*for normalizing the tradeoff between timing and wirelength (bb) */
inverse_prev_bb_cost = 1 / bb_cost;
inverse_prev_timing_cost = 1 / timing_cost;
}
inner_crit_iter_count = 1;
for(inner_iter = 0; inner_iter < move_lim; inner_iter++)
{
if(try_swap(t, &cost, &bb_cost, &timing_cost,
rlim, placer_opts.place_cost_type,
old_region_occ_x, old_region_occ_y,
placer_opts.num_regions, fixed_pins,
placer_opts.place_algorithm,
placer_opts.timing_tradeoff,
inverse_prev_bb_cost,
inverse_prev_timing_cost, &delay_cost,
x_lookup) == 1)
{
success_sum++;
av_cost += cost;
av_bb_cost += bb_cost;
av_timing_cost += timing_cost;
av_delay_cost += delay_cost;
sum_of_squares += cost * cost;
}
if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
|| placer_opts.place_algorithm ==
PATH_TIMING_DRIVEN_PLACE)
{
if(inner_crit_iter_count >= inner_recompute_limit
&& inner_iter != move_lim - 1)
{ /*on last iteration don't recompute */
inner_crit_iter_count = 0;
#ifdef VERBOSE
printf
("Inner Loop Recompute Criticalities\n");
#endif
if(placer_opts.place_algorithm ==
NET_TIMING_DRIVEN_PLACE)
{
place_delay_value =
delay_cost / num_connections;
load_constant_net_delay(net_delay,
place_delay_value, clb_net, num_nets);
}
load_timing_graph_net_delays(net_delay);
d_max = load_net_slack(net_slack, 0);
load_criticalities(placer_opts, net_slack,
d_max, crit_exponent);
comp_td_costs(&timing_cost, &delay_cost);
}
inner_crit_iter_count++;
}
#ifdef VERBOSE
printf
("t = %g cost = %g bb_cost = %g timing_cost = %g move = %d dmax = %g\n",
t, cost, bb_cost, timing_cost, inner_iter, d_max);
if(fabs
(bb_cost -
comp_bb_cost(CHECK, placer_opts.place_cost_type,
placer_opts.num_regions)) >
bb_cost * ERROR_TOL)
exit(1);
#endif
}
/* Lines below prevent too much round-off error from accumulating *
* in the cost over many iterations. This round-off can lead to *
* error checks failing because the cost is different from what *
* you get when you recompute from scratch. */
moves_since_cost_recompute += move_lim;
if(moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE)
{
new_bb_cost =
recompute_bb_cost(placer_opts.place_cost_type,
placer_opts.num_regions);
if(fabs(new_bb_cost - bb_cost) > bb_cost * ERROR_TOL)
{
printf
("Error in try_place: new_bb_cost = %g, old bb_cost = %g.\n",
new_bb_cost, bb_cost);
exit(1);
}
bb_cost = new_bb_cost;
if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
|| placer_opts.place_algorithm ==
PATH_TIMING_DRIVEN_PLACE)
{
comp_td_costs(&new_timing_cost, &new_delay_cost);
if(fabs(new_timing_cost - timing_cost) >
timing_cost * ERROR_TOL)
{
printf
("Error in try_place: new_timing_cost = %g, old timing_cost = %g.\n",
new_timing_cost, timing_cost);
exit(1);
}
if(fabs(new_delay_cost - delay_cost) >
delay_cost * ERROR_TOL)
{
printf
("Error in try_place: new_delay_cost = %g, old delay_cost = %g.\n",
new_delay_cost, delay_cost);
exit(1);
}
timing_cost = new_timing_cost;
}
if(placer_opts.place_algorithm == BOUNDING_BOX_PLACE)
{
cost = new_bb_cost;
}
moves_since_cost_recompute = 0;
}
tot_iter += move_lim;
success_rat = ((float)success_sum) / move_lim;
if(success_sum == 0)
{
av_cost = cost;
av_bb_cost = bb_cost;
av_timing_cost = timing_cost;
av_delay_cost = delay_cost;
}
else
{
av_cost /= success_sum;
av_bb_cost /= success_sum;
av_timing_cost /= success_sum;
av_delay_cost /= success_sum;
}
std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);
#ifndef SPEC
printf
("%11.5g %10.6g %11.6g %11.6g %11.6g %11.6g %11.4g %9.4g %8.3g %7.4g %7.4g %10d ",
t, av_cost, av_bb_cost, av_timing_cost, av_delay_cost,
place_delay_value, d_max, success_rat, std_dev, rlim,
crit_exponent, tot_iter);
#endif
oldt = t; /* for finding and printing alpha. */
update_t(&t, std_dev, rlim, success_rat, annealing_sched);
#ifndef SPEC
printf("%7.4g\n", t / oldt);
fflush(stdout);
#endif
sprintf(msg,
"Cost: %g BB Cost %g TD Cost %g Temperature: %g d_max: %g",
cost, bb_cost, timing_cost, t, d_max);
update_screen(MINOR, msg, PLACEMENT, FALSE);
update_rlim(&rlim, success_rat);
if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
{
crit_exponent =
(1 -
(rlim -
final_rlim) * inverse_delta_rlim) *
(placer_opts.td_place_exp_last -
placer_opts.td_place_exp_first) +
placer_opts.td_place_exp_first;
}
#ifdef VERBOSE
dump_clbs();
#endif
}
t = 0; /* freeze out */
av_cost = 0.;
av_bb_cost = 0.;
av_timing_cost = 0.;
sum_of_squares = 0.;
av_delay_cost = 0.;
success_sum = 0;
if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)
{
/*at each temperature change we update these values to be used */
/*for normalizing the tradeoff between timing and wirelength (bb) */
if(outer_crit_iter_count >= placer_opts.recompute_crit_iter ||
placer_opts.inner_loop_recompute_divider != 0)
{
#ifdef VERBOSE
printf("Outer Loop Recompute Criticalities\n");
#endif
place_delay_value = delay_cost / num_connections;
if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE)
load_constant_net_delay(net_delay, place_delay_value, clb_net, num_nets);
load_timing_graph_net_delays(net_delay);
d_max = load_net_slack(net_slack, 0);
load_criticalities(placer_opts, net_slack, d_max,
crit_exponent);
/*recompute criticaliies */
comp_td_costs(&timing_cost, &delay_cost);
outer_crit_iter_count = 0;
}
outer_crit_iter_count++;
inverse_prev_bb_cost = 1 / (bb_cost);
inverse_prev_timing_cost = 1 / (timing_cost);
}
inner_crit_iter_count = 1;
for(inner_iter = 0; inner_iter < move_lim; inner_iter++)
{
if(try_swap(t, &cost, &bb_cost, &timing_cost,
rlim, placer_opts.place_cost_type,
old_region_occ_x, old_region_occ_y,
placer_opts.num_regions, fixed_pins,
placer_opts.place_algorithm,
placer_opts.timing_tradeoff, inverse_prev_bb_cost,
inverse_prev_timing_cost, &delay_cost, x_lookup) == 1)
{
success_sum++;
av_cost += cost;
av_bb_cost += bb_cost;
av_delay_cost += delay_cost;
av_timing_cost += timing_cost;
sum_of_squares += cost * cost;
if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE
|| placer_opts.place_algorithm ==
PATH_TIMING_DRIVEN_PLACE)
{
if(inner_crit_iter_count >= inner_recompute_limit
&& inner_iter != move_lim - 1)
{
inner_crit_iter_count = 0;
#ifdef VERBOSE
printf
("Inner Loop Recompute Criticalities\n");
#endif
if(placer_opts.place_algorithm ==
NET_TIMING_DRIVEN_PLACE)
{
place_delay_value =
delay_cost / num_connections;
load_constant_net_delay(net_delay,
place_delay_value, clb_net, num_nets);
}
load_timing_graph_net_delays(net_delay);
d_max = load_net_slack(net_slack, 0);
load_criticalities(placer_opts, net_slack,
d_max, crit_exponent);
comp_td_costs(&timing_cost, &delay_cost);
}
inner_crit_iter_count++;
}
}
#ifdef VERBOSE
printf("t = %g cost = %g move = %d\n", t, cost, tot_iter);
#endif
}
tot_iter += move_lim;
success_rat = ((float)success_sum) / move_lim;
if(success_sum == 0)
{
av_cost = cost;
av_bb_cost = bb_cost;
av_delay_cost = delay_cost;
av_timing_cost = timing_cost;
}
else
{
av_cost /= success_sum;
av_bb_cost /= success_sum;
av_delay_cost /= success_sum;
av_timing_cost /= success_sum;
}
std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);
#ifndef SPEC
printf
("%11.5g %10.6g %11.6g %11.6g %11.6g %11.6g %11.4g %9.4g %8.3g %7.4g %7.4g %10d \n\n",
t, av_cost, av_bb_cost, av_timing_cost, av_delay_cost,
place_delay_value, d_max, success_rat, std_dev, rlim,
crit_exponent, tot_iter);
#endif
#ifdef VERBOSE
dump_clbs();
#endif
check_place(bb_cost, timing_cost, placer_opts.place_cost_type,
placer_opts.num_regions, placer_opts.place_algorithm,
delay_cost);
if(placer_opts.enable_timing_computations &&
placer_opts.place_algorithm == BOUNDING_BOX_PLACE)
{
/*need this done since the timing data has not been kept up to date*
*in bounding_box mode */
for(inet = 0; inet < num_nets; inet++)
for(ipin = 1; ipin <= clb_net[inet].num_sinks; ipin++)
timing_place_crit[inet][ipin] = 0; /*dummy crit values */
comp_td_costs(&timing_cost, &delay_cost); /*computes point_to_point_delay_cost */
}
if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE ||
placer_opts.enable_timing_computations)
{
net_delay = point_to_point_delay_cost; /*this makes net_delay up to date with *
*the same values that the placer is using*/
load_timing_graph_net_delays(net_delay);
est_crit = load_net_slack(net_slack, 0);
#ifdef CREATE_ECHO_FILES
/* print_sink_delays("placement_sink_delays.echo"); */
print_net_slack("placement_net_slacks.echo", net_slack);
print_critical_path("placement_crit_path.echo");
#endif /* CREATE_ECHO_FILES */
printf("Placement Estimated Crit Path Delay: %g\n\n", est_crit);
}
sprintf(msg,
"Placement. Cost: %g bb_cost: %g td_cost: %g Channel Factor: %d d_max: %g",
cost, bb_cost, timing_cost, width_fac, d_max);
printf
("Placement. Cost: %g bb_cost: %g td_cost: %g delay_cost: %g.\n",
cost, bb_cost, timing_cost, delay_cost);
update_screen(MAJOR, msg, PLACEMENT, FALSE);
#ifdef SPEC
printf("Total moves attempted: %d.0\n", tot_iter);
#endif
if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE ||
placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE ||
placer_opts.enable_timing_computations)
{
net_delay = remember_net_delay_original_ptr;
free_placement_structs(placer_opts.place_cost_type,
placer_opts.num_regions, old_region_occ_x,
old_region_occ_y, placer_opts);
free_lookups_and_criticalities(&net_delay, &net_slack);
}
/* placement is done - find mst of all nets.
* creating mst for each net; this gives me an ordering of sinks
* by which I will direct search (A*) for. */
if(*mst)
{
for(inet = 0; inet < num_nets; inet++)
{
assert((*mst)[inet]);
free((*mst)[inet]);
}
free(*mst);
}
*mst = (t_mst_edge **) my_malloc(sizeof(t_mst_edge *) * num_nets);
for(inet = 0; inet < num_nets; inet++)
{
(*mst)[inet] = get_mst_of_net(inet);
}
free(x_lookup);
}
Here is the call graph for this function:
Here is the caller graph for this function: