|
VPR-6.0
|
This graph shows which files directly or indirectly include this file:Go to the source code of this file.
Functions | |
| void | do_clustering (const t_arch *arch, int num_models, boolean global_clocks, boolean *is_clock, boolean hill_climbing_flag, char *out_fname, boolean timing_driven, enum e_cluster_seed cluster_seed_type, float alpha, float beta, int recompute_timing_after, float block_delay, float intra_cluster_net_delay, float inter_cluster_net_delay, float aspect, boolean allow_unrelated_clustering, boolean allow_early_exit, boolean connection_driven, enum e_packer_algorithm packer_algorithm, boolean hack_no_legal_frac_lut, boolean hack_safe_latch) |
| int | get_cluster_of_block (int blkidx) |
| void do_clustering | ( | const t_arch * | arch, |
| int | num_models, | ||
| boolean | global_clocks, | ||
| boolean * | is_clock, | ||
| boolean | hill_climbing_flag, | ||
| char * | out_fname, | ||
| boolean | timing_driven, | ||
| enum e_cluster_seed | cluster_seed_type, | ||
| float | alpha, | ||
| float | beta, | ||
| int | recompute_timing_after, | ||
| float | block_delay, | ||
| float | intra_cluster_net_delay, | ||
| float | inter_cluster_net_delay, | ||
| float | aspect, | ||
| boolean | allow_unrelated_clustering, | ||
| boolean | allow_early_exit, | ||
| boolean | connection_driven, | ||
| enum e_packer_algorithm | packer_algorithm, | ||
| boolean | hack_no_legal_frac_lut, | ||
| boolean | hack_safe_latch | ||
| ) |
Does the actual work of clustering multiple VPACK_LUT+FF logic blocks into clusters.
note: I allow timing_driven and connection_driven to be simultaneously true in this case, connection_driven is responsible for all clustering decisions but timing analysis is still enabled (useful for viewing the constant delay results) Algorithm employed: 1. Find type that can legally hold block and create cluster with pb info 2. Populate started cluster 3. Repeat 1 until no more blocks need to be clustered
Definition at line 223 of file cluster.c.
{
int istart, i, j, k, m, iblk;
int blocks_since_last_analysis;
int next_blk, prev_blk;
int num_blocks_hill_added;
int num_logical_blocks_clustered;
int max_cluster_size, cur_cluster_size, max_primitive_inputs, cur_primitive_inputs, max_pb_depth, cur_pb_depth;
int *hill_climbing_inputs_avail;
t_block *clb;
t_pb *cur_pb;
int num_clb;
boolean critical_path_minimized;
boolean early_exit;
enum e_block_pack_status block_pack_status;
int *num_used_instances_type, *num_instances_type; /* [0..num_types] Holds array for total number of each cluster_type available */
float **net_slack = NULL;
float num_paths_scaling, distance_scaling;
float crit;
hack_frac_lut_no_legality = hack_no_legal_frac_lut;
hack_force_safe_latch = hack_safe_latch;
/* TODO: This is memory inefficient, fix if causes problems */
clb = my_calloc(num_logical_blocks,sizeof(t_block));
num_clb = 0;
istart = NO_CLUSTER;
/* determine bound on cluster size and primitive input size */
max_cluster_size = 0;
max_primitive_inputs = 0;
max_pb_depth = 0;
indexofcrit = 0;
for(i = 0; i < num_types; i++) {
if(EMPTY_TYPE == &type_descriptors[i])
continue;
cur_cluster_size = get_max_cluster_size(type_descriptors[i].pb_type);
cur_primitive_inputs = get_max_primitive_inputs(type_descriptors[i].pb_type);
cur_pb_depth = get_max_pb_depth(type_descriptors[i].pb_type);
if(cur_cluster_size > max_cluster_size) {
max_cluster_size = cur_cluster_size;
}
if(cur_primitive_inputs > max_primitive_inputs) {
max_primitive_inputs = cur_primitive_inputs;
}
if(cur_pb_depth > max_pb_depth) {
max_pb_depth = cur_pb_depth;
}
}
if(hill_climbing_flag) {
hill_climbing_inputs_avail = (int *) my_calloc (max_cluster_size + 1, sizeof(int));
} else {
hill_climbing_inputs_avail = NULL; /* if used, die hard */
}
/* TODO: make better estimate for nx and ny */
nx = ny = 1;
check_clocks (is_clock);
#if 0
check_for_duplicate_inputs ();
#endif
alloc_and_init_clustering (global_clocks, alpha, beta, max_cluster_size, max_primitive_inputs, max_pb_depth, num_models);
blocks_since_last_analysis = 0;
critical_path_minimized = FALSE;
early_exit = FALSE;
num_logical_blocks_clustered = 0;
num_blocks_hill_added = 0;
num_used_instances_type = (int*)my_calloc (num_types, sizeof (int));
num_instances_type = (int*)my_calloc (num_types, sizeof (int));
assert(max_cluster_size < MAX_SHORT); /* Limit maximum number of elements for each cluster */
if (timing_driven){
net_slack = alloc_and_load_pre_packing_timing_graph(block_delay, inter_cluster_net_delay, arch->models);
load_net_slack(net_slack, 0);
criticality=(float*)my_calloc(num_logical_blocks,sizeof(float));
critindexarray=(int*)my_malloc(num_logical_blocks*sizeof(int));
for(i = 0; i < num_logical_blocks; i++) {
critindexarray[i] = i;
}
/* Calculate criticality based on slacks and tie breakers (# paths, distance from source) */
for(i = 0; i < num_tnodes; i++) {
iblk = tnode[i].block;
num_paths_scaling = SCALE_NUM_PATHS * (float)tnode[i].normalized_total_critical_paths;
distance_scaling = SCALE_DISTANCE_VAL * (float)tnode[i].normalized_T_arr;
crit = (1 - tnode[i].normalized_slack) + num_paths_scaling + distance_scaling;
if(criticality[iblk] < crit) {
criticality[iblk] = crit;
}
}
net_pin_backward_criticality = (float**)my_calloc(num_logical_nets, sizeof(float*));
net_pin_forward_criticality = (float**)my_calloc(num_logical_nets, sizeof(float*));
for(i = 0; i < num_logical_nets; i++) {
net_pin_backward_criticality[i] = (float *)my_calloc(vpack_net[i].num_sinks + 1, sizeof(float));
net_pin_forward_criticality[i] = (float *)my_calloc(vpack_net[i].num_sinks + 1, sizeof(float));
for(j = 0; j <= vpack_net[i].num_sinks; j++) {
net_pin_backward_criticality[i][j] = criticality[vpack_net[i].node_block[j]];
net_pin_forward_criticality[i][j] = criticality[vpack_net[i].node_block[0]];
}
}
heapsort(critindexarray, criticality, num_logical_blocks, 1);
/*print_critical_path("clustering_critical_path.echo");*/
if (cluster_seed_type == VPACK_TIMING){
istart=get_most_critical_unclustered_block();
}
else {/*max input seed*/
printf("get_seed_logical_block_with_most_ext_inputs\n");
istart = get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs);
}
}
else /*cluster seed is max input (since there is no timing information)*/{
istart = get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs);
}
while (istart != NO_CLUSTER) {
reset_legalizer_for_cluster(&clb[num_clb]);
/* start a new cluster and reset all stats */
start_new_cluster(packer_algorithm, arch, &clb[num_clb], num_clb, istart, aspect, num_used_instances_type, num_instances_type, num_models, max_cluster_size, max_primitive_inputs);
if(logical_block[istart].type != VPACK_INPAD && logical_block[istart].type != VPACK_OUTPAD)
{
printf("Complex Block %d: %s type %s\n", num_clb, clb[num_clb].name, clb[num_clb].type->name);
fflush(stdout);
}
update_cluster_stats (istart, num_clb, is_clock, global_clocks, alpha, beta, timing_driven, connection_driven);
num_clb++;
num_logical_blocks_clustered ++;
if (timing_driven && !early_exit) {
blocks_since_last_analysis++;
/*it doesn't make sense to do a timing analysis here since there*
*is only one logical_block clustered it would not change anything */
}
/* First try to pack primitive into cluster, select next block based on closest open sibling if available */
cur_pb = logical_block[istart].pb->parent_pb;
prev_blk = NO_CLUSTER;
while (cur_pb) {
if(packer_algorithm == PACK_BRUTE_FORCE) { /* if using brute force approach, look at all possible free blocks in cluster */
cur_pb = clb[num_clb - 1].pb;
}
next_blk = get_logical_block_for_cluster(packer_algorithm,
cur_pb,
is_clock,
allow_unrelated_clustering);
block_pack_status = alloc_and_load_pb(packer_algorithm, cur_pb->pb_graph_node, next_blk, cur_pb, num_models, max_cluster_size, max_primitive_inputs);
if(block_pack_status != BLK_PASSED) {
if(next_blk != NO_CLUSTER && prev_blk != next_blk) {
if(block_pack_status == BLK_FAILED_ROUTE) {
#ifdef DEBUG_FAILED_PACKING_CANDIDATES
printf("NO_ROUTE:%s#%d|%s ", logical_block[next_blk].name, next_blk, logical_block[next_blk].model->name);
#else
printf(".");
#endif
} else {
#ifdef DEBUG_FAILED_PACKING_CANDIDATES
printf("FAILED_CHECK:%s#%d|%s check %d", logical_block[next_blk].name, next_blk, logical_block[next_blk].model->name, block_pack_status);
#else
printf(".");
#endif
}
prev_blk = next_blk;
} else {
prev_blk = NO_CLUSTER;
cur_pb = cur_pb->parent_pb;
}
continue;
} else {
/* Continue packing by filling smallest cluster */
if(hack_frac_lut_no_legality) {
logical_block[next_blk].clb_index = num_clb - 1;
}
cur_pb = logical_block[next_blk].pb->parent_pb;
prev_blk = NO_CLUSTER;
#ifdef DEBUG_FAILED_PACKING_CANDIDATES
printf("PASSED:%s#%d ", logical_block[next_blk].name, next_blk);
#else
printf(".");
#endif
}
update_cluster_stats (next_blk, num_clb - 1, is_clock, global_clocks,
alpha, beta, timing_driven, connection_driven);
num_logical_blocks_clustered ++;
if (timing_driven && !early_exit) {
blocks_since_last_analysis++; /* historically, timing slacks were recomputed after X number of blocks were packed, but this doesn't significantly alter results so I (jluu) did not port the code */
}
}
if(logical_block[istart].type != VPACK_INPAD && logical_block[istart].type != VPACK_OUTPAD)
{
printf("\n");
}
save_cluster_solution();
if(hack_frac_lut_no_legality) {
k = 0;
for(i = 0; i < clb[num_clb-1].pb->pb_graph_node->num_input_ports; i++) {
for(j = 0; j < clb[num_clb-1].pb->pb_graph_node->num_input_pins[i]; j++) {
while(k < num_logical_nets) {
if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && !vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0)
{
rr_node[clb[num_clb-1].pb->pb_graph_node->input_pins[i][j].pin_count_in_cluster].net_num = k;
k++;
break;
}
k++;
}
}
}
/* check if nets are overused */
while(k < num_logical_nets) {
if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && !vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0)
{
printf(ERRTAG "Too many input nets used in cluster %s\n", clb[num_clb-1].name);
assert(0);
}
k++;
}
k = 0;
for(i = 0; i < clb[num_clb-1].pb->pb_graph_node->num_output_ports; i++) {
for(j = 0; j < clb[num_clb-1].pb->pb_graph_node->num_output_pins[i]; j++) {
while(k < num_logical_nets) {
for(m = 0; m <= vpack_net[k].num_sinks; m++) {
if(logical_block[vpack_net[k].node_block[m]].clb_index != num_clb - 1) {
break;
}
}
if(clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && (clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] < vpack_net[k].num_sinks + 1))
{
rr_node[clb[num_clb-1].pb->pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num = k;
k++;
break;
}
k++;
}
}
}
while(k < num_logical_nets) {
if(clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && (clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] < vpack_net[k].num_sinks + 1))
{
printf(ERRTAG "Too many output nets used in cluster %s\n", clb[num_clb-1].name);
assert(0);
}
k++;
}
k = 0;
for(i = 0; i < clb[num_clb-1].pb->pb_graph_node->num_clock_ports; i++) {
for(j = 0; j < clb[num_clb-1].pb->pb_graph_node->num_clock_pins[i]; j++) {
while(k < num_logical_nets) {
if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0)
{
rr_node[clb[num_clb-1].pb->pb_graph_node->clock_pins[i][j].pin_count_in_cluster].net_num = k;
k++;
break;
}
k++;
}
}
}
while(k < num_logical_nets) {
if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0)
{
printf(ERRTAG "Too many clock nets used in cluster %s\n", clb[num_clb-1].name);
assert(0);
}
k++;
}
}
if (timing_driven){
if (num_blocks_hill_added > 0 && !early_exit) {
blocks_since_last_analysis += num_blocks_hill_added;
}
if (cluster_seed_type == VPACK_TIMING){
istart=get_most_critical_unclustered_block();
}
else { /*max input seed*/
istart = get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs);
}
}
else /*cluster seed is max input (since there is no timing information)*/
istart=get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs);
free_pb_stats_recursive (clb[num_clb-1].pb, num_models);
}
if (timing_driven){
free_timing_graph(net_slack);
}
free_cluster_legality_checker();
alloc_and_load_cluster_info (num_clb, clb);
check_clustering (num_clb, clb, is_clock);
output_clustering( clb, num_clb, global_clocks, is_clock, out_fname, FALSE);
#ifdef DUMP_BLIF_ECHO
if(!hack_frac_lut_no_legality) /* must have routing graph before dumping blif */
output_blif( clb, num_clb, global_clocks, is_clock, "post_packing_blif.echo", FALSE);
#endif
if(hill_climbing_flag)
{
free (hill_climbing_inputs_avail);
}
for(i = 0; i < num_clb; i++) {
free_cb(clb[i].pb, num_models);
free(clb[i].name);
free(clb[i].nets);
free(clb[i].pb);
}
free(clb);
free (num_used_instances_type);
free (num_instances_type);
free (unclustered_list_head);
free (memory_pool);
free (net_output_feeds_driving_block_input);
}
Here is the call graph for this function:
Here is the caller graph for this function:| int get_cluster_of_block | ( | int | blkidx | ) |