VPR-6.0
|
00001 /** 00002 * @file 00003 * 00004 * Main clustering algorithm 00005 * Author(s): Vaughn Betz (first revision - VPack), Alexander Marquardt (second revision - T-VPack), Jason Luu (third revision - AAPack) 00006 * June 8, 2011 00007 */ 00008 00009 #include <stdio.h> 00010 #include <assert.h> 00011 #include <string.h> 00012 00013 #include "util.h" 00014 #include "vpr_types.h" 00015 #include "globals.h" 00016 #include "cluster.h" 00017 #include "heapsort.h" 00018 #include "output_clustering.h" 00019 #include "output_blif.h" 00020 #include "SetupGrid.h" 00021 #include "read_xml_arch_file.h" 00022 #include "cluster_legality.h" 00023 #include "path_delay2.h" 00024 #include "path_delay.h" 00025 00026 #define AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE 30 /**< This value is used to determine the max size of the priority queue for candidates that pass the early filter legality test but not the more detailed routing test */ 00027 00028 #define SCALE_NUM_PATHS 1e-2 /**< this value is used as a multiplier to assign a 00029 * slightly higher criticality value to nets that 00030 * affect a large number of critical paths versus 00031 * nets that do not have such a broad effect. 00032 * Note that this multiplier is intentionally very 00033 * small compared to the total criticality because 00034 * we want to make sure that vpack_net criticality is 00035 * primarily determined by slacks, with this acting 00036 * only as a tie-breaker between otherwise equal nets 00037 */ 00038 #define SCALE_DISTANCE_VAL 1e-4 /**< this value is used as a multiplier to assign a 00039 * slightly higher criticality value to nets that 00040 * are otherwise equal but one is farther 00041 * from sources (the farther one being made slightly 00042 * more critical) 00043 */ 00044 00045 00046 enum e_gain_update {GAIN, NO_GAIN}; 00047 enum e_feasibility {FEASIBLE, INFEASIBLE}; 00048 enum e_gain_type {HILL_CLIMBING, NOT_HILL_CLIMBING}; 00049 enum e_removal_policy {REMOVE_CLUSTERED, LEAVE_CLUSTERED}; 00050 enum e_net_relation_to_clustered_block {INPUT, OUTPUT}; 00051 00052 /** Linked list structure. Stores one integer (iblk). */ 00053 struct s_ilink {int iblk; struct s_ilink *next;}; 00054 00055 00056 /** 1/MARKED_FRAC is the fraction of nets or blocks that must be 00057 * marked in order for the brute force (go through the whole 00058 * data structure linearly) gain update etc. code to be used. 00059 * This is done for speed only; make MARKED_FRAC whatever 00060 * number speeds the code up most. 00061 */ 00062 #define MARKED_FRAC 2 00063 00064 /** Keeps a linked list of the unclustered blocks to speed up looking for 00065 * unclustered blocks with a certain number of *external* inputs. 00066 * [0..lut_size]. Unclustered_list_head[i] points to the head of the 00067 * list of blocks with i inputs to be hooked up via external interconnect. 00068 */ 00069 static struct s_ilink *unclustered_list_head; 00070 int unclustered_list_head_size; 00071 static struct s_ilink *memory_pool; /**< Declared here so I can free easily.*/ 00072 00073 /** Does the logical_block that drives the output of this vpack_net also appear as a 00074 * receiver (input) pin of the vpack_net? [0..num_logical_nets-1]. If so, then by how much? This is used 00075 * in the gain routines to avoid double counting the connections from 00076 * the current cluster to other blocks (hence yielding better 00077 * clusterings). The only time a logical_block should connect to the same vpack_net 00078 * twice is when one connection is an output and the other is an input, 00079 * so this should take care of all multiple connections. 00080 */ 00081 static int *net_output_feeds_driving_block_input; 00082 00083 static int hack_frac_lut_no_legality; 00084 static int hack_force_safe_latch; 00085 00086 static int indexofcrit; /**< index of next most timing critical block */ 00087 00088 /**@{*/ 00089 /** Timing information for blocks */ 00090 static float *criticality = NULL; 00091 static int *critindexarray = NULL; 00092 /**@}*/ 00093 00094 static float **net_pin_backward_criticality = NULL; 00095 static float **net_pin_forward_criticality = NULL; 00096 00097 00098 00099 /*****************************************/ 00100 /*local functions*/ 00101 /*****************************************/ 00102 00103 static int num_ext_inputs (int iblk); 00104 static void check_clocks (boolean *is_clock); 00105 00106 static int get_max_cluster_size(t_pb_type *pb_type); 00107 static int get_max_primitive_inputs(t_pb_type *pb_type); 00108 static int get_max_pb_depth(t_pb_type *pb_type); 00109 00110 #if 0 00111 static void check_for_duplicate_inputs (); 00112 #endif 00113 static boolean is_logical_blk_in_pb(int iblk, t_pb *pb); 00114 00115 static void add_blk_to_pb_stats_candidates(int iblk, float *gain, t_pb *pb); 00116 00117 static void alloc_and_init_clustering (boolean global_clocks, float alpha, float beta, int max_cluster_size, int max_primitive_inputs, int max_pb_depth, int max_models); 00118 static void free_pb_stats_recursive (t_pb *pb, int max_models); 00119 static boolean outputs_clocks_and_models_feasible (enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb); 00120 static boolean models_feasible(enum e_packer_algorithm packer_algorithm, int iblk, const t_pb_type *cur_pb_type, t_pb *cur_pb, int mode); 00121 static boolean primitive_feasible(int iblk, t_pb *cur_pb); 00122 static boolean primitive_type_feasible(int iblk, const t_pb_type *cur_pb_type, t_pb *memory_class_pb, int sibling_memory_blk); 00123 00124 00125 static int get_logical_block_by_num_ext_inputs (INP enum e_packer_algorithm packer_algorithm, 00126 INOUTP t_pb *cur_pb, 00127 INP int ext_inps, 00128 INP enum e_removal_policy remove_flag); 00129 00130 static int get_free_logical_block_with_most_ext_inputs_for_cluster (INP enum e_packer_algorithm packer_algorithm, 00131 INOUTP t_pb *cur_pb); 00132 00133 static int get_seed_logical_block_with_most_ext_inputs (int max_primitive_inputs); 00134 static void alloc_and_load_pb_stats (t_pb *pb, int max_models, int max_cluster_size, int max_primitive_inputs); 00135 00136 static enum e_block_pack_status alloc_and_load_pb(INP enum e_packer_algorithm packer_algorithm, INP t_pb_graph_node *pb_graph_node, INP int iblock, INOUTP t_pb * pb, INP int max_models, INP int max_cluster_size, INP int max_primitive_inputs); 00137 00138 static void update_connection_gain_values (int inet, int clustered_block, t_pb * cur_pb, 00139 enum e_net_relation_to_clustered_block net_relation_to_clustered_block); 00140 00141 static void update_length_gain_values (int inet, int clustered_block, t_pb* cur_pb, 00142 enum e_net_relation_to_clustered_block net_relation_to_clustered_block); 00143 00144 static void mark_and_update_partial_gain (int inet, 00145 enum e_gain_update gain_flag, 00146 int clustered_block, 00147 int port_on_clustered_block, 00148 int pin_on_clustered_block, 00149 boolean timing_driven, 00150 boolean connection_driven, 00151 enum e_net_relation_to_clustered_block net_relation_to_clustered_block); 00152 00153 static void update_total_gain(float alpha, float beta, boolean timing_driven, boolean 00154 connection_driven, boolean global_clocks, t_pb *pb); 00155 00156 static void update_cluster_stats ( INP int new_blk, 00157 INP int clb_index, 00158 INP boolean *is_clock, 00159 INP boolean global_clocks, 00160 INP float alpha, 00161 INP float beta, 00162 INP boolean timing_driven, 00163 INP boolean connection_driven); 00164 00165 static void start_new_cluster(INP enum e_packer_algorithm packer_algorithm, 00166 INP const t_arch * arch, 00167 INOUTP t_block *new_cluster, 00168 INP int clb_index, 00169 INP int istart, 00170 INP float aspect, 00171 INOUTP int *num_used_instances_type, 00172 INOUTP int *num_instances_type, 00173 INP int num_models, 00174 INP int max_cluster_size, 00175 INP int max_primitive_inputs); 00176 00177 static boolean inputs_outputs_models_and_clocks_feasible (INP enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb); 00178 00179 static int get_highest_gain_logical_block (INP enum e_packer_algorithm packer_algorithm, 00180 INOUTP t_pb *cur_pb, 00181 INP boolean *is_clock, 00182 INP boolean (*is_feasible) 00183 (enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb), 00184 INP enum e_gain_type gain_mode); 00185 00186 static int get_logical_block_for_cluster (INP enum e_packer_algorithm packer_algorithm, 00187 INP t_pb *cur_pb, 00188 INP boolean *is_clock, 00189 INP boolean allow_unrelated_clustering); 00190 00191 static int get_free_logical_block_with_fewest_ext_inputs (INP enum e_packer_algorithm packer_algorithm, 00192 INOUTP t_pb *cur_pb); 00193 00194 static int get_most_feasible_logical_block (INOUTP t_pb *cur_pb); 00195 00196 static void alloc_and_load_cluster_info (INP int num_clb, INOUTP t_block *clb); 00197 00198 static void check_clustering (int num_clb, t_block *clb, boolean *is_clock); 00199 00200 static void check_cluster_logical_blocks (t_pb *pb, boolean *blocks_checked); 00201 00202 static int get_most_critical_unclustered_block(void); 00203 00204 static void free_cb (t_pb *pb, int max_models); 00205 static void free_pb_stats(t_pb_stats pb_stats, int max_models); 00206 static void free_pb (t_pb *pb, int max_models); 00207 00208 /*****************************************/ 00209 /*globally accessable function*/ 00210 00211 /** Does the actual work of clustering multiple VPACK_LUT+FF logic blocks 00212 * into clusters. 00213 * 00214 * note: I allow timing_driven and connection_driven to be simultaneously true 00215 * in this case, connection_driven is responsible for all clustering decisions 00216 * but timing analysis is still enabled (useful for viewing the constant delay 00217 * results) 00218 * Algorithm employed: 00219 * 1. Find type that can legally hold block and create cluster with pb info 00220 * 2. Populate started cluster 00221 * 3. Repeat 1 until no more blocks need to be clustered 00222 */ 00223 void do_clustering (const t_arch *arch, int num_models, boolean global_clocks, 00224 boolean *is_clock, boolean hill_climbing_flag, 00225 char *out_fname, boolean timing_driven, 00226 enum e_cluster_seed cluster_seed_type, float alpha, float beta, 00227 int recompute_timing_after, float block_delay, 00228 float intra_cluster_net_delay, float inter_cluster_net_delay, 00229 float aspect, 00230 boolean allow_unrelated_clustering, 00231 boolean allow_early_exit, boolean connection_driven, 00232 enum e_packer_algorithm packer_algorithm, 00233 boolean hack_no_legal_frac_lut, boolean hack_safe_latch){ 00234 00235 00236 int istart, i, j, k, m, iblk; 00237 int blocks_since_last_analysis; 00238 int next_blk, prev_blk; 00239 00240 int num_blocks_hill_added; 00241 int num_logical_blocks_clustered; 00242 int max_cluster_size, cur_cluster_size, max_primitive_inputs, cur_primitive_inputs, max_pb_depth, cur_pb_depth; 00243 00244 int *hill_climbing_inputs_avail; 00245 00246 t_block *clb; 00247 t_pb *cur_pb; 00248 int num_clb; 00249 00250 boolean critical_path_minimized; 00251 boolean early_exit; 00252 00253 enum e_block_pack_status block_pack_status; 00254 00255 int *num_used_instances_type, *num_instances_type; /* [0..num_types] Holds array for total number of each cluster_type available */ 00256 float **net_slack = NULL; 00257 float num_paths_scaling, distance_scaling; 00258 00259 float crit; 00260 00261 hack_frac_lut_no_legality = hack_no_legal_frac_lut; 00262 hack_force_safe_latch = hack_safe_latch; 00263 00264 /* TODO: This is memory inefficient, fix if causes problems */ 00265 clb = my_calloc(num_logical_blocks,sizeof(t_block)); 00266 num_clb = 0; 00267 istart = NO_CLUSTER; 00268 00269 /* determine bound on cluster size and primitive input size */ 00270 max_cluster_size = 0; 00271 max_primitive_inputs = 0; 00272 max_pb_depth = 0; 00273 00274 indexofcrit = 0; 00275 00276 for(i = 0; i < num_types; i++) { 00277 if(EMPTY_TYPE == &type_descriptors[i]) 00278 continue; 00279 cur_cluster_size = get_max_cluster_size(type_descriptors[i].pb_type); 00280 cur_primitive_inputs = get_max_primitive_inputs(type_descriptors[i].pb_type); 00281 cur_pb_depth = get_max_pb_depth(type_descriptors[i].pb_type); 00282 if(cur_cluster_size > max_cluster_size) { 00283 max_cluster_size = cur_cluster_size; 00284 } 00285 if(cur_primitive_inputs > max_primitive_inputs) { 00286 max_primitive_inputs = cur_primitive_inputs; 00287 } 00288 if(cur_pb_depth > max_pb_depth) { 00289 max_pb_depth = cur_pb_depth; 00290 } 00291 } 00292 00293 if(hill_climbing_flag) { 00294 hill_climbing_inputs_avail = (int *) my_calloc (max_cluster_size + 1, sizeof(int)); 00295 } else { 00296 hill_climbing_inputs_avail = NULL; /* if used, die hard */ 00297 } 00298 00299 /* TODO: make better estimate for nx and ny */ 00300 nx = ny = 1; 00301 00302 check_clocks (is_clock); 00303 #if 0 00304 check_for_duplicate_inputs (); 00305 #endif 00306 alloc_and_init_clustering (global_clocks, alpha, beta, max_cluster_size, max_primitive_inputs, max_pb_depth, num_models); 00307 00308 blocks_since_last_analysis = 0; 00309 critical_path_minimized = FALSE; 00310 early_exit = FALSE; 00311 num_logical_blocks_clustered = 0; 00312 num_blocks_hill_added = 0; 00313 num_used_instances_type = (int*)my_calloc (num_types, sizeof (int)); 00314 num_instances_type = (int*)my_calloc (num_types, sizeof (int)); 00315 00316 assert(max_cluster_size < MAX_SHORT); /* Limit maximum number of elements for each cluster */ 00317 00318 if (timing_driven){ 00319 net_slack = alloc_and_load_pre_packing_timing_graph(block_delay, inter_cluster_net_delay, arch->models); 00320 load_net_slack(net_slack, 0); 00321 00322 criticality=(float*)my_calloc(num_logical_blocks,sizeof(float)); 00323 00324 critindexarray=(int*)my_malloc(num_logical_blocks*sizeof(int)); 00325 00326 for(i = 0; i < num_logical_blocks; i++) { 00327 critindexarray[i] = i; 00328 } 00329 00330 /* Calculate criticality based on slacks and tie breakers (# paths, distance from source) */ 00331 for(i = 0; i < num_tnodes; i++) { 00332 iblk = tnode[i].block; 00333 num_paths_scaling = SCALE_NUM_PATHS * (float)tnode[i].normalized_total_critical_paths; 00334 distance_scaling = SCALE_DISTANCE_VAL * (float)tnode[i].normalized_T_arr; 00335 crit = (1 - tnode[i].normalized_slack) + num_paths_scaling + distance_scaling; 00336 if(criticality[iblk] < crit) { 00337 criticality[iblk] = crit; 00338 } 00339 } 00340 00341 net_pin_backward_criticality = (float**)my_calloc(num_logical_nets, sizeof(float*)); 00342 net_pin_forward_criticality = (float**)my_calloc(num_logical_nets, sizeof(float*)); 00343 for(i = 0; i < num_logical_nets; i++) { 00344 net_pin_backward_criticality[i] = (float *)my_calloc(vpack_net[i].num_sinks + 1, sizeof(float)); 00345 net_pin_forward_criticality[i] = (float *)my_calloc(vpack_net[i].num_sinks + 1, sizeof(float)); 00346 for(j = 0; j <= vpack_net[i].num_sinks; j++) { 00347 net_pin_backward_criticality[i][j] = criticality[vpack_net[i].node_block[j]]; 00348 net_pin_forward_criticality[i][j] = criticality[vpack_net[i].node_block[0]]; 00349 } 00350 } 00351 00352 heapsort(critindexarray, criticality, num_logical_blocks, 1); 00353 00354 /*print_critical_path("clustering_critical_path.echo");*/ 00355 00356 00357 if (cluster_seed_type == VPACK_TIMING){ 00358 istart=get_most_critical_unclustered_block(); 00359 } 00360 else {/*max input seed*/ 00361 printf("get_seed_logical_block_with_most_ext_inputs\n"); 00362 istart = get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs); 00363 } 00364 } 00365 else /*cluster seed is max input (since there is no timing information)*/{ 00366 istart = get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs); 00367 } 00368 00369 while (istart != NO_CLUSTER) { 00370 00371 reset_legalizer_for_cluster(&clb[num_clb]); 00372 00373 /* start a new cluster and reset all stats */ 00374 start_new_cluster(packer_algorithm, arch, &clb[num_clb], num_clb, istart, aspect, num_used_instances_type, num_instances_type, num_models, max_cluster_size, max_primitive_inputs); 00375 if(logical_block[istart].type != VPACK_INPAD && logical_block[istart].type != VPACK_OUTPAD) 00376 { 00377 printf("Complex Block %d: %s type %s\n", num_clb, clb[num_clb].name, clb[num_clb].type->name); 00378 fflush(stdout); 00379 } 00380 update_cluster_stats (istart, num_clb, is_clock, global_clocks, alpha, beta, timing_driven, connection_driven); 00381 num_clb++; 00382 00383 00384 num_logical_blocks_clustered ++; 00385 00386 if (timing_driven && !early_exit) { 00387 blocks_since_last_analysis++; 00388 /*it doesn't make sense to do a timing analysis here since there* 00389 *is only one logical_block clustered it would not change anything */ 00390 } 00391 00392 /* First try to pack primitive into cluster, select next block based on closest open sibling if available */ 00393 cur_pb = logical_block[istart].pb->parent_pb; 00394 prev_blk = NO_CLUSTER; 00395 00396 while (cur_pb) { 00397 00398 if(packer_algorithm == PACK_BRUTE_FORCE) { /* if using brute force approach, look at all possible free blocks in cluster */ 00399 cur_pb = clb[num_clb - 1].pb; 00400 } 00401 00402 next_blk = get_logical_block_for_cluster(packer_algorithm, 00403 cur_pb, 00404 is_clock, 00405 allow_unrelated_clustering); 00406 00407 block_pack_status = alloc_and_load_pb(packer_algorithm, cur_pb->pb_graph_node, next_blk, cur_pb, num_models, max_cluster_size, max_primitive_inputs); 00408 if(block_pack_status != BLK_PASSED) { 00409 if(next_blk != NO_CLUSTER && prev_blk != next_blk) { 00410 if(block_pack_status == BLK_FAILED_ROUTE) { 00411 #ifdef DEBUG_FAILED_PACKING_CANDIDATES 00412 printf("NO_ROUTE:%s#%d|%s ", logical_block[next_blk].name, next_blk, logical_block[next_blk].model->name); 00413 #else 00414 printf("."); 00415 #endif 00416 } else { 00417 #ifdef DEBUG_FAILED_PACKING_CANDIDATES 00418 printf("FAILED_CHECK:%s#%d|%s check %d", logical_block[next_blk].name, next_blk, logical_block[next_blk].model->name, block_pack_status); 00419 #else 00420 printf("."); 00421 #endif 00422 } 00423 prev_blk = next_blk; 00424 } else { 00425 prev_blk = NO_CLUSTER; 00426 cur_pb = cur_pb->parent_pb; 00427 } 00428 continue; 00429 } else { 00430 /* Continue packing by filling smallest cluster */ 00431 if(hack_frac_lut_no_legality) { 00432 logical_block[next_blk].clb_index = num_clb - 1; 00433 } 00434 cur_pb = logical_block[next_blk].pb->parent_pb; 00435 prev_blk = NO_CLUSTER; 00436 #ifdef DEBUG_FAILED_PACKING_CANDIDATES 00437 printf("PASSED:%s#%d ", logical_block[next_blk].name, next_blk); 00438 #else 00439 printf("."); 00440 #endif 00441 } 00442 update_cluster_stats (next_blk, num_clb - 1, is_clock, global_clocks, 00443 alpha, beta, timing_driven, connection_driven); 00444 00445 num_logical_blocks_clustered ++; 00446 00447 if (timing_driven && !early_exit) { 00448 blocks_since_last_analysis++; /* historically, timing slacks were recomputed after X number of blocks were packed, but this doesn't significantly alter results so I (jluu) did not port the code */ 00449 00450 } 00451 } 00452 if(logical_block[istart].type != VPACK_INPAD && logical_block[istart].type != VPACK_OUTPAD) 00453 { 00454 printf("\n"); 00455 } 00456 00457 save_cluster_solution(); 00458 if(hack_frac_lut_no_legality) { 00459 k = 0; 00460 for(i = 0; i < clb[num_clb-1].pb->pb_graph_node->num_input_ports; i++) { 00461 for(j = 0; j < clb[num_clb-1].pb->pb_graph_node->num_input_pins[i]; j++) { 00462 while(k < num_logical_nets) { 00463 if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && !vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0) 00464 { 00465 rr_node[clb[num_clb-1].pb->pb_graph_node->input_pins[i][j].pin_count_in_cluster].net_num = k; 00466 k++; 00467 break; 00468 } 00469 k++; 00470 } 00471 } 00472 } 00473 /* check if nets are overused */ 00474 while(k < num_logical_nets) { 00475 if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && !vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0) 00476 { 00477 printf(ERRTAG "Too many input nets used in cluster %s\n", clb[num_clb-1].name); 00478 assert(0); 00479 } 00480 k++; 00481 } 00482 k = 0; 00483 for(i = 0; i < clb[num_clb-1].pb->pb_graph_node->num_output_ports; i++) { 00484 for(j = 0; j < clb[num_clb-1].pb->pb_graph_node->num_output_pins[i]; j++) { 00485 while(k < num_logical_nets) { 00486 for(m = 0; m <= vpack_net[k].num_sinks; m++) { 00487 if(logical_block[vpack_net[k].node_block[m]].clb_index != num_clb - 1) { 00488 break; 00489 } 00490 } 00491 if(clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && (clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] < vpack_net[k].num_sinks + 1)) 00492 { 00493 rr_node[clb[num_clb-1].pb->pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num = k; 00494 k++; 00495 break; 00496 } 00497 k++; 00498 } 00499 } 00500 } 00501 while(k < num_logical_nets) { 00502 if(clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && (clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] < vpack_net[k].num_sinks + 1)) 00503 { 00504 printf(ERRTAG "Too many output nets used in cluster %s\n", clb[num_clb-1].name); 00505 assert(0); 00506 } 00507 k++; 00508 } 00509 k = 0; 00510 for(i = 0; i < clb[num_clb-1].pb->pb_graph_node->num_clock_ports; i++) { 00511 for(j = 0; j < clb[num_clb-1].pb->pb_graph_node->num_clock_pins[i]; j++) { 00512 while(k < num_logical_nets) { 00513 if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0) 00514 { 00515 rr_node[clb[num_clb-1].pb->pb_graph_node->clock_pins[i][j].pin_count_in_cluster].net_num = k; 00516 k++; 00517 break; 00518 } 00519 k++; 00520 } 00521 } 00522 } 00523 while(k < num_logical_nets) { 00524 if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0) 00525 { 00526 printf(ERRTAG "Too many clock nets used in cluster %s\n", clb[num_clb-1].name); 00527 assert(0); 00528 } 00529 k++; 00530 } 00531 } 00532 00533 if (timing_driven){ 00534 if (num_blocks_hill_added > 0 && !early_exit) { 00535 blocks_since_last_analysis += num_blocks_hill_added; 00536 } 00537 00538 if (cluster_seed_type == VPACK_TIMING){ 00539 istart=get_most_critical_unclustered_block(); 00540 } 00541 else { /*max input seed*/ 00542 istart = get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs); 00543 } 00544 } 00545 else /*cluster seed is max input (since there is no timing information)*/ 00546 istart=get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs); 00547 00548 free_pb_stats_recursive (clb[num_clb-1].pb, num_models); 00549 } 00550 00551 if (timing_driven){ 00552 free_timing_graph(net_slack); 00553 } 00554 00555 free_cluster_legality_checker(); 00556 00557 alloc_and_load_cluster_info (num_clb, clb); 00558 00559 check_clustering (num_clb, clb, is_clock); 00560 00561 00562 output_clustering( clb, num_clb, global_clocks, is_clock, out_fname, FALSE); 00563 #ifdef DUMP_BLIF_ECHO 00564 if(!hack_frac_lut_no_legality) /* must have routing graph before dumping blif */ 00565 output_blif( clb, num_clb, global_clocks, is_clock, "post_packing_blif.echo", FALSE); 00566 #endif 00567 00568 if(hill_climbing_flag) 00569 { 00570 free (hill_climbing_inputs_avail); 00571 } 00572 00573 for(i = 0; i < num_clb; i++) { 00574 free_cb(clb[i].pb, num_models); 00575 free(clb[i].name); 00576 free(clb[i].nets); 00577 free(clb[i].pb); 00578 } 00579 free(clb); 00580 00581 free (num_used_instances_type); 00582 free (num_instances_type); 00583 free (unclustered_list_head); 00584 free (memory_pool); 00585 free (net_output_feeds_driving_block_input); 00586 00587 } 00588 00589 /** Returns the number of input pins on this logical_block that must be hooked 00590 * up through external interconnect. That is, the number of input 00591 * pins used - the number which connect (internally) to the outputs. 00592 */ 00593 static int num_ext_inputs (int iblk) { 00594 00595 int ext_inps, output_net, ipin, opin; 00596 00597 t_model_ports *port, *out_port; 00598 00599 /* TODO: process to get ext_inps is slow, should cache in lookup table */ 00600 ext_inps = 0; 00601 port = logical_block[iblk].model->inputs; 00602 while(port) { 00603 if(port->is_clock == FALSE) { 00604 for(ipin = 0; ipin < port->size; ipin++) { 00605 if(logical_block[iblk].input_nets[port->index][ipin] != OPEN) { 00606 ext_inps++; 00607 } 00608 out_port = logical_block[iblk].model->outputs; 00609 while(out_port) { 00610 for(opin = 0; opin < out_port->size; opin++) { 00611 output_net = logical_block[iblk].output_nets[out_port->index][opin]; 00612 if(output_net == OPEN) 00613 continue; 00614 /* TODO: I could speed things up a bit by computing the number of inputs * 00615 * and number of external inputs for each logic logical_block at the start of * 00616 * clustering and storing them in arrays. Look into if speed is a * 00617 * problem. */ 00618 00619 if (logical_block[iblk].input_nets[port->index][ipin] == output_net) { 00620 ext_inps--; 00621 break; 00622 } 00623 } 00624 out_port = out_port->next; 00625 } 00626 } 00627 } 00628 port = port->next; 00629 } 00630 00631 assert(ext_inps >= 0); 00632 00633 return (ext_inps); 00634 } 00635 00636 00637 /*****************************************/ 00638 /** Checks that nets used as clock inputs to latches are never also used 00639 * as VPACK_LUT inputs. It's electrically questionable, and more importantly 00640 * would break the clustering code. 00641 */ 00642 static void check_clocks (boolean *is_clock) { 00643 00644 int inet, iblk, ipin; 00645 t_model_ports *port; 00646 00647 for (iblk=0;iblk<num_logical_blocks;iblk++) { 00648 if (logical_block[iblk].type != VPACK_OUTPAD) { 00649 port = logical_block[iblk].model->inputs; 00650 while(port) { 00651 for (ipin = 0; ipin < port->size; ipin++) { 00652 inet = logical_block[iblk].input_nets[port->index][ipin]; 00653 if (inet != OPEN) { 00654 if (is_clock[inet]) { 00655 printf("Error in check_clocks. Net %d (%s) is a clock, but " 00656 "also\n" 00657 "\tconnects to a logic block input on logical_block %d (%s).\n", inet, 00658 vpack_net[inet].name, iblk, logical_block[iblk].name); 00659 printf("This would break the current clustering " 00660 "implementation and is electrically\n" 00661 "\tquestionable, so clustering has been aborted.\n"); 00662 exit (1); 00663 } 00664 } 00665 } 00666 port = port->next; 00667 } 00668 } 00669 } 00670 } 00671 00672 static int get_max_cluster_size(t_pb_type *pb_type) { 00673 int i, j; 00674 int max_size, temp_size; 00675 if (pb_type->modes == 0) { 00676 max_size = pb_type->num_pb; 00677 } else { 00678 max_size = 0; 00679 for(i = 0; i < pb_type->num_modes; i++) { 00680 temp_size = 0; 00681 for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) { 00682 temp_size += pb_type->modes[i].pb_type_children[j].num_pb * 00683 get_max_cluster_size(&pb_type->modes[i].pb_type_children[j]); 00684 } 00685 if(temp_size > max_size) { 00686 max_size = temp_size; 00687 } 00688 } 00689 } 00690 return max_size; 00691 } 00692 00693 00694 static int get_max_primitive_inputs(t_pb_type *pb_type) { 00695 int i, j; 00696 int max_size, temp_size; 00697 if (pb_type->blif_model != NULL) { 00698 max_size = pb_type->num_input_pins; 00699 } else { 00700 max_size = 0; 00701 for(i = 0; i < pb_type->num_modes; i++) { 00702 temp_size = 0; 00703 for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) { 00704 temp_size = get_max_primitive_inputs(&pb_type->modes[i].pb_type_children[j]); 00705 if(temp_size > max_size) { 00706 max_size = temp_size; 00707 } 00708 } 00709 } 00710 } 00711 return max_size; 00712 } 00713 00714 static int get_max_pb_depth(t_pb_type *pb_type) { 00715 int i, j; 00716 int max_depth, temp_depth; 00717 max_depth = pb_type->depth; 00718 for(i = 0; i < pb_type->num_modes; i++) { 00719 for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) { 00720 temp_depth = get_max_pb_depth(&pb_type->modes[i].pb_type_children[j]); 00721 if(temp_depth > max_depth) { 00722 max_depth = temp_depth; 00723 } 00724 } 00725 } 00726 return max_depth; 00727 } 00728 00729 /** Determine if logical block is in pb */ 00730 static boolean is_logical_blk_in_pb(int iblk, t_pb *pb) { 00731 t_pb * cur_pb; 00732 cur_pb = logical_block[iblk].pb; 00733 while(cur_pb) { 00734 if(cur_pb == pb) { 00735 return TRUE; 00736 } 00737 cur_pb = cur_pb->parent_pb; 00738 } 00739 return FALSE; 00740 } 00741 00742 /** Add blk to list of feasible blocks sorted according to gain */ 00743 static void add_blk_to_pb_stats_candidates(int iblk, float *gain, t_pb *pb) { 00744 int i, j; 00745 i = 0; 00746 if(i == pb->pb_stats.num_marked_models) { 00747 /* Corresponding model not found, add it */ 00748 pb->pb_stats.num_marked_models++; 00749 pb->pb_stats.num_feasible_blocks[i] = 1; 00750 pb->pb_stats.feasible_blocks[i][0] = iblk; 00751 } else { 00752 /* found matching model, bubble sort */ 00753 if(pb->pb_stats.num_feasible_blocks[i] >= AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE - 1) { 00754 /* maximum size for array, remove smallest gain element and sort */ 00755 if(gain[iblk] > gain[pb->pb_stats.feasible_blocks[i][0]]) { 00756 /* single loop insertion sort */ 00757 for(j = 0; j < pb->pb_stats.num_feasible_blocks[i] - 1; j++) { 00758 if(gain[iblk] <= gain[pb->pb_stats.feasible_blocks[i][j + 1]]) { 00759 pb->pb_stats.feasible_blocks[i][j] = iblk; 00760 break; 00761 } else { 00762 pb->pb_stats.feasible_blocks[i][j] = pb->pb_stats.feasible_blocks[i][j + 1]; 00763 } 00764 } 00765 if(j == pb->pb_stats.num_feasible_blocks[i] - 1) { 00766 pb->pb_stats.feasible_blocks[i][j] = iblk; 00767 } 00768 } 00769 } else { 00770 /* Expand array and single loop insertion sort */ 00771 for(j = pb->pb_stats.num_feasible_blocks[i] - 1; j >= 0; j--) { 00772 if(gain[pb->pb_stats.feasible_blocks[i][j]] > gain[iblk]) { 00773 pb->pb_stats.feasible_blocks[i][j + 1] = pb->pb_stats.feasible_blocks[i][j]; 00774 } else { 00775 pb->pb_stats.feasible_blocks[i][j + 1] = iblk; 00776 break; 00777 } 00778 } 00779 if(j < 0) { 00780 pb->pb_stats.feasible_blocks[i][0] = iblk; 00781 } 00782 pb->pb_stats.num_feasible_blocks[i]++; 00783 } 00784 } 00785 } 00786 00787 00788 00789 /*****************************************/ 00790 /** Allocates the main data structures used for clustering and properly 00791 * initializes them. 00792 */ 00793 static void alloc_and_init_clustering (boolean global_clocks, 00794 float alpha, float beta, 00795 int max_cluster_size, 00796 int max_primitive_inputs, 00797 int max_pb_depth, 00798 int max_models) { 00799 00800 int i, ext_inps, ipin, driving_blk, inet; 00801 struct s_ilink *next_ptr; 00802 00803 alloc_and_load_cluster_legality_checker(); 00804 00805 for (i=0;i<num_logical_blocks;i++) { 00806 logical_block[i].clb_index = NO_CLUSTER; 00807 } 00808 00809 unclustered_list_head = (struct s_ilink *) my_calloc(max_primitive_inputs + 1, sizeof(struct s_ilink)); 00810 unclustered_list_head_size = max_primitive_inputs + 1; 00811 00812 for (i = 0; i <= max_primitive_inputs; i++) { 00813 unclustered_list_head[i].next = NULL; 00814 } 00815 00816 00817 memory_pool = (struct s_ilink *) my_malloc (num_logical_blocks * sizeof (struct s_ilink)); 00818 next_ptr = memory_pool; 00819 00820 for (i=0;i<num_logical_blocks;i++) { 00821 ext_inps = num_ext_inputs(i); 00822 next_ptr->next = unclustered_list_head[ext_inps].next; 00823 unclustered_list_head[ext_inps].next = next_ptr; 00824 next_ptr->iblk = i; 00825 next_ptr++; 00826 } 00827 00828 00829 net_output_feeds_driving_block_input = (int *) my_malloc ( 00830 num_logical_nets * sizeof (int)); 00831 00832 for (inet=0;inet<num_logical_nets;inet++) { 00833 net_output_feeds_driving_block_input[inet] = 0; 00834 driving_blk = vpack_net[inet].node_block[0]; 00835 for (ipin=1;ipin<=vpack_net[inet].num_sinks;ipin++) { 00836 if (vpack_net[inet].node_block[ipin] == driving_blk) { 00837 net_output_feeds_driving_block_input[inet]++; 00838 } 00839 } 00840 } 00841 } 00842 00843 00844 /*****************************************/ 00845 static void free_pb_stats_recursive (t_pb *pb, int max_models) { 00846 00847 int i, j; 00848 /* Releases all the memory used by clustering data structures. */ 00849 if(pb) { 00850 if(pb->pb_graph_node != NULL) { 00851 if(pb->pb_graph_node->pb_type->num_modes != 0) { 00852 for(i = 0; i < pb->pb_graph_node->pb_type->modes[pb->mode].num_pb_type_children; i++) { 00853 for(j = 0; j < pb->pb_graph_node->pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) { 00854 if(pb->child_pbs && pb->child_pbs[i]) { 00855 free_pb_stats_recursive (&pb->child_pbs[i][j], max_models); 00856 } 00857 } 00858 } 00859 } 00860 } 00861 free_pb_stats(pb->pb_stats, max_models); 00862 } 00863 } 00864 00865 00866 /*****************************************/ 00867 /** Checks if logical_block iblk could be adding to the open cluster without 00868 * violating clocking constraints. Returns TRUE if it's OK. Some 00869 * parameters are unused since this function needs the same inter- 00870 * face as the other feasibility checkers. 00871 */ 00872 static boolean outputs_clocks_and_models_feasible (enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb) { 00873 00874 int inet, depth, clocks_avail; 00875 t_model_ports *port; 00876 int ipin, output_net, outputs_avail; 00877 boolean clocks_feasible, outputs_feasible; 00878 t_pb * temp_pb; 00879 00880 /* Assumptions: 1. Clock network unique, can only connect to clock network */ 00881 /* 2. Logic block output can't internally connect to clocks. */ 00882 clocks_feasible = outputs_feasible = TRUE; 00883 temp_pb = cur_pb; 00884 while(temp_pb && clocks_feasible && outputs_feasible) { 00885 depth = temp_pb->pb_graph_node->pb_type->depth; 00886 00887 clocks_avail = cur_pb->pb_stats.clocks_avail; 00888 if(clocks_avail == NOT_VALID) { 00889 clocks_avail = temp_pb->pb_graph_node->pb_type->num_clock_pins; 00890 } 00891 00892 inet = logical_block[iblk].clock_net; 00893 if (inet != OPEN) { 00894 if (temp_pb->pb_stats.num_pins_of_net_in_pb[inet] == 0) { 00895 clocks_avail--; 00896 } 00897 else if (temp_pb->pb_stats.num_pins_of_net_in_pb[inet] == 1 && 00898 temp_pb->pb_stats.net_output_in_pb[inet]) { 00899 clocks_avail--; 00900 } 00901 } 00902 00903 00904 outputs_avail = temp_pb->pb_stats.outputs_avail; 00905 00906 port = logical_block[iblk].model->outputs; 00907 while(port) { 00908 /* Outputs that connect to internal blocks free up an input pin. */ 00909 for(ipin = 0; ipin < port->size; ipin++) { 00910 output_net = logical_block[iblk].output_nets[port->index][ipin]; 00911 if (output_net != OPEN) { 00912 if(temp_pb->pb_stats.num_pins_of_net_in_pb[output_net] >= vpack_net[output_net].num_sinks - net_output_feeds_driving_block_input[output_net]) { 00913 if((temp_pb->pb_stats.num_pins_of_net_in_pb[output_net] != vpack_net[output_net].num_sinks - net_output_feeds_driving_block_input[output_net])) { 00914 printf("[INTERNAL ERROR] net %d %s %d != %d\n", output_net, vpack_net[output_net].name, 00915 temp_pb->pb_stats.num_pins_of_net_in_pb[output_net], vpack_net[output_net].num_sinks - net_output_feeds_driving_block_input[output_net]); 00916 } 00917 assert(temp_pb->pb_stats.num_pins_of_net_in_pb[output_net] == vpack_net[output_net].num_sinks - net_output_feeds_driving_block_input[output_net]); 00918 } else { 00919 outputs_avail--; 00920 } 00921 } 00922 } 00923 port = port->next; 00924 } 00925 00926 port = logical_block[iblk].model->inputs; 00927 while(port) { 00928 if(port->is_clock == TRUE) { 00929 port = port->next; 00930 continue; 00931 } 00932 for (ipin = 0; ipin < port->size; ipin++) { 00933 inet = logical_block[iblk].input_nets[port->index][ipin]; 00934 if(inet != OPEN) { 00935 if(temp_pb->pb_stats.net_output_in_pb[inet] && 00936 temp_pb->pb_stats.num_pins_of_net_in_pb[inet] + net_output_feeds_driving_block_input[inet] == vpack_net[inet].num_sinks) { 00937 outputs_avail++; 00938 } 00939 } 00940 } 00941 port = port->next; 00942 } 00943 00944 clocks_feasible = (clocks_avail >= 0); 00945 outputs_feasible = (outputs_avail >= 0); 00946 00947 temp_pb = temp_pb->parent_pb; 00948 } 00949 00950 if(models_feasible(packer_algorithm, iblk, cur_pb->pb_graph_node->pb_type, cur_pb, cur_pb->mode)) { 00951 if (clocks_feasible && outputs_feasible) 00952 return (TRUE); 00953 else 00954 return (FALSE); 00955 } else { 00956 return FALSE; 00957 } 00958 } 00959 00960 00961 static boolean models_feasible(enum e_packer_algorithm packer_algorithm, int iblk, const t_pb_type *cur_pb_type, t_pb *cur_pb, int mode) { 00962 struct s_linked_vptr *cur_model; 00963 int i, j, k, cur_ptr; 00964 struct s_ilink *ptr; 00965 boolean feasible; 00966 const t_pb_type *child_pb_type; 00967 00968 cur_model = cur_pb_type->models_contained; 00969 00970 00971 assert(cur_pb == NULL || cur_pb->pb_graph_node->pb_type == cur_pb_type); 00972 assert(cur_pb == NULL || cur_pb->mode == mode); 00973 00974 feasible = FALSE; 00975 while(cur_model) { 00976 if(cur_model->data_vptr == logical_block[iblk].model) { 00977 feasible = TRUE; 00978 break; 00979 } 00980 cur_model = cur_model->next; 00981 } 00982 00983 if(!feasible) { 00984 return FALSE; 00985 } 00986 if(packer_algorithm == PACK_BRUTE_FORCE) { 00987 /* let the brute force packer determine if an empty slot exists */ 00988 return TRUE; 00989 } 00990 00991 feasible = FALSE; 00992 00993 if(cur_pb_type->num_modes == 0) { 00994 if(hack_force_safe_latch) { 00995 /* TODO: Either remove hack or actually make it a feature properly 00996 00997 hack to make the LUT get packed before the LATCH gets packed for cases that the LATCH can be absorbed 00998 for fracturable LUT architectures 00999 */ 01000 if(strcmp(logical_block[iblk].model->name, "latch") == 0) { 01001 i = logical_block[iblk].input_nets[0][0]; 01002 j = vpack_net[i].node_block[0]; 01003 if(logical_block[j].clb_index == NO_CLUSTER && vpack_net[i].num_sinks == 1 && 01004 strcmp(logical_block[j].model->name, "names") == 0) { 01005 cur_ptr = logical_block[j].model->inputs[0].size; 01006 ptr = unclustered_list_head[cur_ptr].next; 01007 while(ptr == NULL && cur_ptr > 0) { 01008 cur_ptr--; 01009 ptr = unclustered_list_head[cur_ptr].next; 01010 } 01011 if (cur_ptr > 2) 01012 return FALSE; 01013 } 01014 } 01015 } 01016 01017 return primitive_type_feasible(iblk, cur_pb_type, NULL, OPEN); 01018 } 01019 01020 for(i = 0; i < cur_pb_type->modes[mode].num_pb_type_children; i++) { 01021 child_pb_type = &cur_pb_type->modes[mode].pb_type_children[i]; 01022 if(cur_pb) { 01023 for(k = 0; k < child_pb_type->num_pb && cur_pb->child_pbs && cur_pb->child_pbs[i]; k++) { 01024 if(cur_pb->child_pbs[i][k].name == NULL) { 01025 break; 01026 } 01027 } 01028 if(k == child_pb_type->num_pb) { 01029 /* no free child */ 01030 continue; 01031 } 01032 } 01033 if(child_pb_type->num_modes == 0) { 01034 feasible = primitive_type_feasible(iblk, &cur_pb_type->modes[mode].pb_type_children[i], NULL, OPEN); 01035 if(feasible) { 01036 return TRUE; 01037 } 01038 } else { 01039 for(j = 0; j < cur_pb_type->modes[mode].pb_type_children[i].num_modes; j++) { 01040 feasible = models_feasible(packer_algorithm, iblk, &cur_pb_type->modes[mode].pb_type_children[i], NULL, j); 01041 if(feasible) { 01042 return TRUE; 01043 } 01044 } 01045 } 01046 } 01047 01048 return FALSE; 01049 } 01050 01051 static boolean primitive_feasible(int iblk, t_pb *cur_pb) { 01052 const t_pb_type *cur_pb_type; 01053 int i; 01054 t_pb *memory_class_pb; /* Used for memory class only, for memories, open pins must be the same among siblings */ 01055 int sibling_memory_blk; 01056 01057 cur_pb_type = cur_pb->pb_graph_node->pb_type; 01058 memory_class_pb = NULL; 01059 sibling_memory_blk = OPEN; 01060 01061 assert(cur_pb_type->num_modes == 0); /* primitive */ 01062 if(cur_pb->logical_block != OPEN) { 01063 /* This pb already has a logical block */ 01064 return FALSE; 01065 } 01066 01067 if(cur_pb_type->class_type == MEMORY_CLASS) { 01068 /* memory class is special, all siblings must share all nets, including open nets, with the exception of data nets */ 01069 /* find sibling if one exists */ 01070 memory_class_pb = cur_pb->parent_pb; 01071 for(i = 0; i < cur_pb_type->parent_mode->num_pb_type_children; i++) { 01072 if(memory_class_pb->child_pbs[cur_pb->mode][i].name != NULL && memory_class_pb->child_pbs[cur_pb->mode][i].logical_block != OPEN) { 01073 sibling_memory_blk = memory_class_pb->child_pbs[cur_pb->mode][i].logical_block; 01074 } 01075 } 01076 if(sibling_memory_blk == OPEN) { 01077 memory_class_pb = NULL; 01078 } 01079 } 01080 01081 return primitive_type_feasible(iblk, cur_pb_type, memory_class_pb, sibling_memory_blk); 01082 } 01083 01084 static boolean primitive_type_feasible(int iblk, const t_pb_type *cur_pb_type, t_pb *memory_class_pb, int sibling_memory_blk) { 01085 t_model_ports *port; 01086 int i, j; 01087 boolean second_pass; 01088 01089 /* check if ports are big enough */ 01090 /* for memories, also check that pins are the same with existing siblings */ 01091 port = logical_block[iblk].model->inputs; 01092 second_pass = FALSE; 01093 while(port || !second_pass) { 01094 /* TODO: This is slow if the number of ports are large, fix if becomes a problem */ 01095 if(!port) { 01096 second_pass = TRUE; 01097 port = logical_block[iblk].model->outputs; 01098 } 01099 for(i = 0; i < cur_pb_type->num_ports; i++) { 01100 if(cur_pb_type->ports[i].model_port == port) { 01101 /* TODO: This is slow, I only need to check from 0 if it is a memory block, other blocks I can check from port->size onwards */ 01102 for(j = 0; j < port->size; j++) { 01103 if(port->dir == IN_PORT && !port->is_clock) { 01104 if(memory_class_pb) { 01105 if(strstr(cur_pb_type->ports[i].port_class, "data") != cur_pb_type->ports[i].port_class) { 01106 if(logical_block[iblk].input_nets[port->index][j] != logical_block[sibling_memory_blk].input_nets[port->index][j]) { 01107 return FALSE; 01108 } 01109 } 01110 } 01111 if(logical_block[iblk].input_nets[port->index][j] != OPEN && j >= cur_pb_type->ports[i].num_pins) { 01112 return FALSE; 01113 } 01114 } else if(port->dir == OUT_PORT) { 01115 if(memory_class_pb) { 01116 if(strstr(cur_pb_type->ports[i].port_class, "data") != cur_pb_type->ports[i].port_class) { 01117 if(logical_block[iblk].output_nets[port->index][j] != logical_block[sibling_memory_blk].output_nets[port->index][j]) { 01118 return FALSE; 01119 } 01120 } 01121 } 01122 if(logical_block[iblk].output_nets[port->index][j] != OPEN && j >= cur_pb_type->ports[i].num_pins) { 01123 return FALSE; 01124 } 01125 } else { 01126 assert(port->dir == IN_PORT && port->is_clock); 01127 assert(j == 0); 01128 if(memory_class_pb) { 01129 if(logical_block[iblk].clock_net != logical_block[sibling_memory_blk].clock_net) { 01130 return FALSE; 01131 } 01132 } 01133 if(logical_block[iblk].clock_net != OPEN && j >= cur_pb_type->ports[i].num_pins) { 01134 return FALSE; 01135 } 01136 } 01137 } 01138 break; 01139 } 01140 } 01141 if(i == cur_pb_type->num_ports) { 01142 if((logical_block[iblk].model->inputs != NULL && !second_pass) || 01143 logical_block[iblk].model->outputs != NULL && second_pass) { 01144 /* physical port not found */ 01145 return FALSE; 01146 } 01147 } 01148 if(port) { 01149 port = port->next; 01150 } 01151 } 01152 return TRUE; 01153 } 01154 01155 /*****************************************/ 01156 /** This routine returns a logical_block which has not been clustered, has 01157 * no connection to the current cluster, satisfies the cluster 01158 * clock constraints, is a valid subblock inside the cluster, does not exceed the cluster subblock units available, 01159 * and has ext_inps external inputs. If 01160 * there is no such logical_block it returns NO_CLUSTER. Remove_flag 01161 * controls whether or not blocks that have already been clustered 01162 * are removed from the unclustered_list data structures. NB: 01163 * to get a logical_block regardless of clock constraints just set clocks_avail > 0. 01164 */ 01165 static int get_logical_block_by_num_ext_inputs ( INP enum e_packer_algorithm packer_algorithm, 01166 INOUTP t_pb *cur_pb, 01167 INP int ext_inps, 01168 INP enum e_removal_policy remove_flag) { 01169 01170 struct s_ilink *ptr, *prev_ptr; 01171 int ilogical_blk; 01172 01173 prev_ptr = &unclustered_list_head[ext_inps]; 01174 ptr = unclustered_list_head[ext_inps].next; 01175 01176 while (ptr != NULL) { 01177 ilogical_blk = ptr->iblk; 01178 /* TODO: Get better candidate logical block in future, eg. return most timing critical or some other smarter metric */ 01179 if (logical_block[ilogical_blk].clb_index == NO_CLUSTER) { 01180 if (outputs_clocks_and_models_feasible(packer_algorithm, ilogical_blk, NULL, cur_pb)) { 01181 return ilogical_blk; 01182 } 01183 01184 prev_ptr = ptr; 01185 01186 } 01187 01188 else if (remove_flag == REMOVE_CLUSTERED) { 01189 prev_ptr->next = ptr->next; 01190 } 01191 01192 ptr = ptr->next; 01193 } 01194 01195 return NO_CLUSTER; 01196 } 01197 01198 01199 /*****************************************/ 01200 /** This routine is used to find new blocks for clustering when there are no feasible 01201 * blocks with any attraction to the current cluster (i.e. it finds 01202 * blocks which are unconnected from the current cluster). It returns 01203 * the logical_block with the largest number of used inputs that satisfies the 01204 * clocking and number of inputs constraints. If no suitable logical_block is 01205 * found, the routine returns NO_CLUSTER. 01206 */ 01207 static int get_free_logical_block_with_most_ext_inputs_for_cluster (INP enum e_packer_algorithm packer_algorithm, 01208 INOUTP t_pb *cur_pb) { 01209 int ext_inps; 01210 int best_block; 01211 01212 int inputs_avail; 01213 01214 inputs_avail = cur_pb->pb_stats.inputs_avail; 01215 if(inputs_avail == NOT_VALID) { 01216 inputs_avail = cur_pb->pb_graph_node->pb_type->num_input_pins; 01217 } 01218 01219 best_block = NO_CLUSTER; 01220 01221 if(inputs_avail >= unclustered_list_head_size) { 01222 inputs_avail = unclustered_list_head_size - 1; 01223 } 01224 01225 for (ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) { 01226 best_block = get_logical_block_by_num_ext_inputs (packer_algorithm, cur_pb, ext_inps, REMOVE_CLUSTERED); 01227 if (best_block != NO_CLUSTER) { 01228 break; 01229 } 01230 } 01231 return best_block; 01232 } 01233 01234 /*****************************************/ 01235 /** This routine is used to find the first seed logical_block for the clustering. It returns 01236 * the logical_block with the largest number of used inputs that satisfies the 01237 * clocking and number of inputs constraints. If no suitable logical_block is 01238 * found, the routine returns NO_CLUSTER. 01239 */ 01240 static int get_seed_logical_block_with_most_ext_inputs (int max_primitive_inputs) { 01241 01242 int iblk, ext_inps; 01243 struct s_ilink *ptr, *prev_ptr; 01244 01245 iblk = NO_CLUSTER; 01246 01247 for (ext_inps=max_primitive_inputs;ext_inps>=0;ext_inps--) { 01248 prev_ptr = &unclustered_list_head[ext_inps]; 01249 ptr = unclustered_list_head[ext_inps].next; 01250 01251 while (ptr != NULL) { 01252 iblk = ptr->iblk; 01253 01254 if (logical_block[iblk].clb_index == NO_CLUSTER) { 01255 prev_ptr->next = ptr->next; /* remove blk from list */ 01256 break; 01257 } else { 01258 iblk = NO_CLUSTER; 01259 } 01260 01261 prev_ptr = ptr; 01262 ptr = ptr->next; 01263 } 01264 01265 if (iblk != NO_CLUSTER) 01266 return (iblk); 01267 } 01268 01269 return (NO_CLUSTER); /* No suitable logical_block found. */ 01270 } 01271 01272 01273 /*****************************************/ 01274 /* Call this routine when starting to fill up a new cluster. It resets 01275 * the gain vector, etc. 01276 */ 01277 static void alloc_and_load_pb_stats (t_pb *pb, int max_models, int max_cluster_size, int max_primitive_inputs) { 01278 01279 int j; 01280 01281 /* If statement below is for speed. If nets are reasonably low-fanout, * 01282 * only a relatively small number of blocks will be marked, and updating * 01283 * only those logical_block structures will be fastest. If almost all blocks * 01284 * have been touched it should be faster to just run through them all * 01285 * in order (less addressing and better cache locality). */ 01286 01287 01288 pb->pb_stats.gain = (float *) my_malloc (num_logical_blocks * sizeof (float)); 01289 pb->pb_stats.lengthgain=(float*) my_malloc (num_logical_blocks * sizeof (float)); 01290 pb->pb_stats.sharinggain=(float*) my_malloc (num_logical_blocks * sizeof (float)); 01291 pb->pb_stats.hillgain =(float*) my_malloc (num_logical_blocks * sizeof (float)); 01292 pb->pb_stats.connectiongain = (float*) my_malloc (num_logical_blocks * sizeof (float)); 01293 pb->pb_stats.inputs_avail = NOT_VALID; 01294 pb->pb_stats.outputs_avail = NOT_VALID; 01295 pb->pb_stats.clocks_avail = NOT_VALID; 01296 pb->pb_stats.num_marked_models = 0; 01297 pb->pb_stats.cur_marked_model = NOT_VALID; 01298 pb->pb_stats.num_feasible_blocks = my_calloc(max_models, sizeof(int)); 01299 pb->pb_stats.feasible_blocks = my_malloc(max_models * sizeof(int*)); 01300 01301 01302 for (j=0;j<max_models;j++) { 01303 pb->pb_stats.feasible_blocks[j] = (int*) my_malloc (AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE * sizeof (int)); 01304 } 01305 01306 for (j=0;j<num_logical_blocks;j++) { 01307 pb->pb_stats.gain[j] = 0.0; 01308 pb->pb_stats.lengthgain[j]= 0.0; 01309 pb->pb_stats.connectiongain[j] = 0; 01310 pb->pb_stats.sharinggain[j]=NOT_VALID; 01311 pb->pb_stats.hillgain[j]=NOT_VALID; 01312 } 01313 01314 pb->pb_stats.num_pins_of_net_in_pb = (int *) my_malloc (num_logical_nets * sizeof(int)); 01315 pb->pb_stats.net_output_in_pb = (boolean *) my_malloc (num_logical_nets * sizeof(boolean)); 01316 01317 for (j=0;j<num_logical_nets;j++) { 01318 pb->pb_stats.num_pins_of_net_in_pb[j] = 0; 01319 pb->pb_stats.net_output_in_pb[j] = FALSE; 01320 } 01321 01322 pb->pb_stats.marked_nets = (int *) my_malloc ((max_primitive_inputs + 2) * max_cluster_size * sizeof(int)); 01323 pb->pb_stats.marked_blocks = (int *) my_malloc (num_logical_blocks * sizeof(int)); 01324 01325 pb->pb_stats.num_marked_nets = 0; 01326 pb->pb_stats.num_marked_blocks = 0; 01327 01328 } 01329 /*****************************************/ 01330 01331 /** Allocate space within pb and load data for it given the current logical block. 01332 * Note: pb space and parent info must be loaded before calling this function 01333 * 01334 * TODO: add-in some heuristic for determing which pb to select if there are multiple choices 01335 */ 01336 static enum e_block_pack_status alloc_and_load_pb(INP enum e_packer_algorithm packer_algorithm, INP t_pb_graph_node *pb_graph_node, INP int iblock, 01337 INOUTP t_pb * pb, INP int max_models, int max_cluster_size, int max_primitive_inputs) { 01338 int i, j, k; 01339 const t_pb_type *pb_type; 01340 01341 boolean is_primitive; 01342 enum e_block_pack_status block_pack_status; 01343 01344 pb->pb_graph_node = pb_graph_node; 01345 pb->logical_block = NO_CLUSTER; 01346 01347 block_pack_status = BLK_STATUS_UNDEFINED; 01348 01349 if(iblock == NO_CLUSTER) { 01350 return BLK_FAILED_FEASIBLE; 01351 } 01352 01353 pb_type = pb_graph_node->pb_type; 01354 01355 is_primitive = (pb_type->num_modes == 0); 01356 01357 /* TODO: It would be good to save and prune away nodes of types that I know won't fit, that way, I don't have to traverse the tree 01358 for types that don't work, this runtime optimization can be done later */ 01359 if(is_primitive) { 01360 if(!primitive_feasible(iblock, pb)) { 01361 return BLK_FAILED_FEASIBLE; 01362 } 01363 assert(pb->logical_block == OPEN); 01364 /* check if I can add and route this block, if they yes, then success, if not, fail */ 01365 if(hack_frac_lut_no_legality) { 01366 block_pack_status = BLK_PASSED; 01367 pb->logical_block = iblock; 01368 logical_block[iblock].pb = pb; 01369 } else { 01370 block_pack_status = try_add_block_to_current_cluster_and_primitive(iblock, pb); 01371 } 01372 if(block_pack_status == BLK_PASSED) { 01373 pb->name = my_strdup(logical_block[iblock].name); 01374 } 01375 } else { 01376 if(pb->child_pbs == NULL) { 01377 /* first time here so must allocate space and determine mode of operation */ 01378 for(i = 0; i < pb_type->num_modes && block_pack_status != BLK_PASSED; i++) { 01379 pb->mode = i; 01380 if(!models_feasible(packer_algorithm, iblock, pb_type, pb, pb->mode)) { 01381 continue; 01382 } 01383 set_pb_mode(pb, 0, 0); /* default mode is to use mode 1 */ 01384 set_pb_mode(pb, i, 1); 01385 pb->child_pbs = my_calloc(pb_type->modes[i].num_pb_type_children, sizeof(t_pb *)); 01386 01387 for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) { 01388 /* allocate space for children if first time allocating space */ 01389 pb->child_pbs[j] = my_calloc(pb_type->modes[i].pb_type_children[j].num_pb, sizeof(t_pb)); 01390 for(k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb; k++) { 01391 pb->child_pbs[j][k].parent_pb = pb; 01392 alloc_and_load_pb_stats(&pb->child_pbs[j][k], max_models, max_cluster_size, max_primitive_inputs); 01393 } 01394 } 01395 for(j = 0; j < pb_type->modes[i].num_pb_type_children && block_pack_status != BLK_PASSED; j++) { 01396 for(k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb && block_pack_status != BLK_PASSED; k++) { 01397 block_pack_status = alloc_and_load_pb(packer_algorithm, &pb_graph_node->child_pb_graph_nodes[i][j][k], iblock, &pb->child_pbs[j][k], max_models, max_cluster_size, max_primitive_inputs); 01398 if(block_pack_status == BLK_FAILED_FEASIBLE) { 01399 /* sibling blocks differ only in interconnect, 01400 if feasibility fails then it is failing on something other than interconnect 01401 therefore, no point trying the same thing for siblings */ 01402 break; 01403 } 01404 } 01405 } 01406 if(block_pack_status == BLK_PASSED) { 01407 pb->name = my_strdup(logical_block[iblock].name); 01408 } else { 01409 set_pb_mode(pb, i, 0); /* default mode is to use mode 1 */ 01410 set_pb_mode(pb, 0, 1); 01411 for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) { 01412 if(pb->child_pbs[j] != NULL) { 01413 for(k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb; k++) { 01414 free_pb_stats(pb->child_pbs[j][k].pb_stats, max_models); 01415 } 01416 free(pb->child_pbs[j]); 01417 } 01418 } 01419 free(pb->child_pbs); 01420 pb->child_pbs = NULL; 01421 } 01422 } 01423 } else { 01424 /* 01425 * Another block has been allocated inside so must select placement carefully 01426 */ 01427 i = pb->mode; 01428 if(!models_feasible(packer_algorithm, iblock, pb_type, pb, pb->mode)) { 01429 block_pack_status = BLK_FAILED_FEASIBLE; 01430 } else { 01431 for(j = 0; j < pb_type->modes[i].num_pb_type_children && block_pack_status != BLK_PASSED; j++) { 01432 for(k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb && block_pack_status != BLK_PASSED; k++) { 01433 /* Simple heuristic: assume that any packed cluster is well-packed, try to pack into new child only */ 01434 /* Brute force heuristic: traverse whole tree always */ 01435 if(pb->child_pbs[j][k].name == NULL) { 01436 block_pack_status = alloc_and_load_pb(packer_algorithm, &pb_graph_node->child_pb_graph_nodes[i][j][k], iblock, &pb->child_pbs[j][k], max_models, max_cluster_size, max_primitive_inputs); 01437 } else if (packer_algorithm == PACK_BRUTE_FORCE && pb_type->modes[i].pb_type_children[j].num_modes != 0) { 01438 block_pack_status = alloc_and_load_pb(packer_algorithm, &pb_graph_node->child_pb_graph_nodes[i][j][k], iblock, &pb->child_pbs[j][k], max_models, max_cluster_size, max_primitive_inputs); 01439 } 01440 if(block_pack_status == BLK_FAILED_FEASIBLE && packer_algorithm != PACK_BRUTE_FORCE) { 01441 /* Children blocks differ only in routing, don't continue if feasibility fails 01442 Brute force algorithm may have just ran into a node that was full, therefore it should not break out 01443 */ 01444 break; 01445 } 01446 } 01447 } 01448 } 01449 } 01450 } 01451 01452 return block_pack_status; 01453 } 01454 01455 /** This function is called when the connectiongain values on the vpack_net 01456 * inet require updating. */ 01457 static void update_connection_gain_values (int inet, int clustered_block, t_pb *cur_pb, enum e_net_relation_to_clustered_block net_relation_to_clustered_block) { 01458 01459 int iblk, ipin; 01460 int clb_index; 01461 int num_internal_connections, num_open_connections, num_stuck_connections; 01462 01463 num_internal_connections = num_open_connections = num_stuck_connections = 0; 01464 01465 clb_index = logical_block[clustered_block].clb_index; 01466 01467 /* may wish to speed things up by ignoring clock nets since they are high fanout */ 01468 01469 for (ipin=0;ipin<=vpack_net[inet].num_sinks;ipin++) { 01470 iblk = vpack_net[inet].node_block[ipin]; 01471 if (logical_block[iblk].clb_index == clb_index && is_logical_blk_in_pb(iblk, logical_block[clustered_block].pb)) { 01472 num_internal_connections++; 01473 } else if (logical_block[iblk].clb_index == OPEN) { 01474 num_open_connections++; 01475 } else { 01476 num_stuck_connections++; 01477 } 01478 } 01479 01480 if (net_relation_to_clustered_block==OUTPUT){ 01481 for (ipin=1;ipin<=vpack_net[inet].num_sinks;ipin++) { 01482 iblk = vpack_net[inet].node_block[ipin]; 01483 if (logical_block[iblk].clb_index == NO_CLUSTER) { 01484 /* Only works if net has one connection to block, TODO: handle case where net has multi-connection to block */ 01485 if(num_internal_connections > 1) { 01486 cur_pb->pb_stats.connectiongain[iblk] -= 1 /(float)(vpack_net[inet].num_sinks - (num_internal_connections - 1) + 1 + 1 * num_stuck_connections); 01487 } 01488 cur_pb->pb_stats.connectiongain[iblk] += 1 /(float)(vpack_net[inet].num_sinks - num_internal_connections + 1 + 1 * num_stuck_connections); 01489 } 01490 } 01491 } 01492 01493 if(net_relation_to_clustered_block==INPUT){ 01494 /*Calculate the connectiongain for the logical_block which is driving * 01495 *the vpack_net that is an input to a logical_block in the cluster */ 01496 01497 iblk=vpack_net[inet].node_block[0]; 01498 if (logical_block[iblk].clb_index == NO_CLUSTER) { 01499 if(num_internal_connections > 1) { 01500 cur_pb->pb_stats.connectiongain[iblk] -= 1 / (float)(vpack_net[inet].num_sinks - (num_internal_connections - 1) + 1 + 1 * num_stuck_connections); 01501 } 01502 cur_pb->pb_stats.connectiongain[iblk] += 1 / (float)(vpack_net[inet].num_sinks - num_internal_connections + 1 + 1 * num_stuck_connections); 01503 } 01504 } 01505 } 01506 /*****************************************/ 01507 /** This function is called when the length_gain values on the vpack_net 01508 * inet requires updating. 01509 */ 01510 static void update_length_gain_values (int inet, int clustered_block, t_pb *cur_pb, 01511 enum e_net_relation_to_clustered_block net_relation_to_clustered_block) { 01512 01513 float lengain; 01514 int newblk, ifirst; 01515 int iblk, ipin; 01516 01517 /* Check if this vpack_net lists its driving logical_block twice. If so, avoid * 01518 * double counting this logical_block by skipping the first (driving) pin. */ 01519 if (net_output_feeds_driving_block_input[inet] == FALSE) 01520 ifirst = 0; 01521 else 01522 ifirst = 1; 01523 01524 if (net_relation_to_clustered_block == OUTPUT && !vpack_net[inet].is_global){ 01525 for (ipin=ifirst; ipin <= vpack_net[inet].num_sinks; ipin++) { 01526 iblk = vpack_net[inet].node_block[ipin]; 01527 if (logical_block[iblk].clb_index == NO_CLUSTER) { 01528 lengain=net_pin_backward_criticality[inet][ipin]; 01529 if (lengain>cur_pb->pb_stats.lengthgain[iblk]) 01530 cur_pb->pb_stats.lengthgain[iblk]=lengain; 01531 } 01532 } 01533 } 01534 01535 if(net_relation_to_clustered_block == INPUT && !vpack_net[inet].is_global){ 01536 /*Calculate the length gain for the logical_block which is driving * 01537 *the vpack_net that is an input to a logical_block in the cluster */ 01538 newblk = vpack_net[inet].node_block[0]; 01539 if (logical_block[newblk].clb_index == NO_CLUSTER) { 01540 for (ipin=1; ipin <= vpack_net[inet].num_sinks; ipin++) { 01541 lengain=net_pin_forward_criticality[inet][ipin]; 01542 if (lengain>cur_pb->pb_stats.lengthgain[newblk]) 01543 cur_pb->pb_stats.lengthgain[newblk]=lengain; 01544 01545 } 01546 } 01547 } 01548 } 01549 /*****************************************/ 01550 /** Updates the marked data structures, and if gain_flag is GAIN, 01551 * the gain when a logic logical_block is added to a cluster. The 01552 * sharinggain is the number of inputs that a logical_block shares with 01553 * blocks that are already in the cluster. Hillgain is the 01554 * reduction in number of pins-required by adding a logical_block to the 01555 * cluster. The lengthgain is the criticality of the most critical 01556 * vpack_net between this logical_block and a logical_block in the cluster. 01557 */ 01558 static void mark_and_update_partial_gain (int inet, 01559 enum e_gain_update gain_flag, int clustered_block, int port_on_clustered_block, int pin_on_clustered_block, 01560 boolean timing_driven, boolean connection_driven, enum e_net_relation_to_clustered_block net_relation_to_clustered_block) { 01561 01562 01563 int iblk, ipin, ifirst; 01564 t_pb *cur_pb; 01565 01566 cur_pb = logical_block[clustered_block].pb->parent_pb; 01567 01568 while(cur_pb) { 01569 /* Mark vpack_net as being visited, if necessary. */ 01570 01571 if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == 0) { 01572 cur_pb->pb_stats.marked_nets[cur_pb->pb_stats.num_marked_nets] = inet; 01573 cur_pb->pb_stats.num_marked_nets++; 01574 } 01575 01576 /* Update gains of affected blocks. */ 01577 01578 if (gain_flag == GAIN) { 01579 01580 /* Check if this vpack_net lists its driving logical_block twice. If so, avoid * 01581 * double counting this logical_block by skipping the first (driving) pin. */ 01582 01583 if (net_output_feeds_driving_block_input[inet] == 0) 01584 ifirst = 0; 01585 else 01586 ifirst = 1; 01587 01588 if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet]==0) { 01589 for (ipin=ifirst;ipin<=vpack_net[inet].num_sinks;ipin++) { 01590 iblk = vpack_net[inet].node_block[ipin]; 01591 if (logical_block[iblk].clb_index == NO_CLUSTER) { 01592 01593 if (cur_pb->pb_stats.sharinggain[iblk] == NOT_VALID) { 01594 cur_pb->pb_stats.marked_blocks[cur_pb->pb_stats.num_marked_blocks] = iblk; 01595 cur_pb->pb_stats.num_marked_blocks++; 01596 cur_pb->pb_stats.sharinggain[iblk]=1; 01597 cur_pb->pb_stats.hillgain[iblk] = 1 - num_ext_inputs (iblk); 01598 } 01599 else { 01600 cur_pb->pb_stats.sharinggain[iblk]++; 01601 cur_pb->pb_stats.hillgain[iblk]++; 01602 } 01603 } 01604 } 01605 } 01606 01607 if (connection_driven) { 01608 update_connection_gain_values(inet, clustered_block, cur_pb, net_relation_to_clustered_block); 01609 } 01610 01611 if (timing_driven) { 01612 update_length_gain_values(inet, clustered_block, cur_pb, net_relation_to_clustered_block); 01613 } 01614 } 01615 01616 cur_pb->pb_stats.num_pins_of_net_in_pb[inet]++; 01617 cur_pb = cur_pb->parent_pb; 01618 } 01619 } 01620 01621 /*****************************************/ 01622 /** Updates the total gain array to reflect the desired tradeoff between 01623 * input sharing (sharinggain) and path_length minimization (lengthgain) 01624 */ 01625 static void update_total_gain(float alpha, float beta, boolean timing_driven, boolean 01626 connection_driven, boolean global_clocks, t_pb *pb){ 01627 01628 int i, iblk, j, k; 01629 t_pb * cur_pb; 01630 int num_input_pins, num_output_pins; 01631 int num_used_input_pins, num_used_output_pins; 01632 t_model_ports *port; 01633 01634 cur_pb = pb; 01635 while(cur_pb) { 01636 01637 for (i=0;i<cur_pb->pb_stats.num_marked_blocks;i++) { 01638 iblk=cur_pb->pb_stats.marked_blocks[i]; 01639 01640 /* Todo: This was used to explore different normalization options, can be made more efficient once we decide on which one to use*/ 01641 num_input_pins = 0; 01642 port = logical_block[iblk].model->inputs; 01643 j =0; 01644 num_used_input_pins = 0; 01645 while(port) { 01646 num_input_pins += port->size; 01647 if(!port->is_clock) { 01648 for(k = 0; k < port->size; k++) { 01649 if(logical_block[iblk].input_nets[j][k] != OPEN) { 01650 num_used_input_pins++; 01651 } 01652 } 01653 j++; 01654 } 01655 port = port->next; 01656 } 01657 if(num_input_pins == 0) { 01658 num_input_pins = 1; 01659 } 01660 01661 num_used_output_pins = 0; 01662 j = 0; 01663 num_output_pins = 0; 01664 port = logical_block[iblk].model->outputs; 01665 while(port) { 01666 num_output_pins += port->size; 01667 for(k = 0; k < port->size; k++) { 01668 if(logical_block[iblk].output_nets[j][k] != OPEN) { 01669 num_used_output_pins++; 01670 } 01671 } 01672 port = port->next; 01673 j++; 01674 } 01675 /* end todo */ 01676 01677 /* Calculate area-only cost function */ 01678 if (connection_driven) { 01679 /*try to absorb as many connections as possible*/ 01680 /* TODO: change back to used pins when done testing */ 01681 cur_pb->pb_stats.gain[iblk] = ((1-beta)*(float)cur_pb->pb_stats.sharinggain[iblk] + beta*(float)cur_pb->pb_stats.connectiongain[iblk])/(num_input_pins + num_output_pins); 01682 /* cur_pb->pb_stats.gain[iblk] = ((1-beta)*(float)cur_pb->pb_stats.sharinggain[iblk] + beta*(float)cur_pb->pb_stats.connectiongain[iblk])/(num_used_input_pins + num_used_output_pins); */ 01683 } else { 01684 /*cur_pb->pb_stats.gain[iblk] = ((float)cur_pb->pb_stats.sharinggain[iblk])/(num_used_input_pins + num_used_output_pins); */ 01685 cur_pb->pb_stats.gain[iblk] = ((float)cur_pb->pb_stats.sharinggain[iblk])/(num_input_pins + num_output_pins); 01686 } 01687 01688 /* Add in timing driven cost into cost function */ 01689 if (timing_driven) { 01690 cur_pb->pb_stats.gain[iblk] = alpha*cur_pb->pb_stats.lengthgain[iblk] + (1.0-alpha)*(float)cur_pb->pb_stats.gain[iblk]; 01691 } 01692 } 01693 cur_pb = cur_pb->parent_pb; 01694 } 01695 } 01696 01697 /*****************************************/ 01698 /** Updates cluster stats such as gain, used pins, and clock structures. */ 01699 static void update_cluster_stats ( INP int new_blk, 01700 INP int clb_index, 01701 INP boolean *is_clock, 01702 INP boolean global_clocks, 01703 INP float alpha, 01704 INP float beta, 01705 INP boolean timing_driven, 01706 INP boolean connection_driven) { 01707 01708 01709 int i, ipin, inet, depth; 01710 t_model_ports *port; 01711 t_pb *cur_pb; 01712 01713 /* TODO: what a scary comment from Vaughn, we'll have to watch out for this causing problems */ 01714 /* Output can be open so the check is necessary. I don't change * 01715 * the gain for clock outputs when clocks are globally distributed * 01716 * because I assume there is no real need to pack similarly clocked * 01717 * FFs together then. Note that by updating the gain when the * 01718 * clock driver is placed in a cluster implies that the output of * 01719 * LUTs can be connected to clock inputs internally. Probably not * 01720 * true, but it doesn't make much difference, since it will still * 01721 * make local routing of this clock very short, and none of my * 01722 * benchmarks actually generate local clocks (all come from pads). */ 01723 01724 01725 logical_block[new_blk].clb_index = clb_index; 01726 01727 cur_pb = logical_block[new_blk].pb->parent_pb; 01728 while(cur_pb) { 01729 depth = cur_pb->pb_graph_node->pb_type->depth; 01730 /* reset list of feasible blocks */ 01731 for (i=0;i<cur_pb->pb_stats.num_marked_models;i++){ 01732 cur_pb->pb_stats.num_feasible_blocks[i] = 0; 01733 } 01734 cur_pb->pb_stats.num_marked_models = 0; 01735 cur_pb->pb_stats.cur_marked_model = NOT_VALID; 01736 01737 /* determine valid pins */ 01738 if (cur_pb->pb_stats.inputs_avail == NOT_VALID) 01739 cur_pb->pb_stats.inputs_avail = cur_pb->pb_graph_node->pb_type->num_input_pins; 01740 01741 if (cur_pb->pb_stats.outputs_avail == NOT_VALID) 01742 cur_pb->pb_stats.outputs_avail = cur_pb->pb_graph_node->pb_type->num_output_pins; 01743 01744 if (cur_pb->pb_stats.clocks_avail == NOT_VALID) 01745 cur_pb->pb_stats.clocks_avail = cur_pb->pb_graph_node->pb_type->num_clock_pins; 01746 01747 cur_pb = cur_pb->parent_pb; 01748 } 01749 01750 port = logical_block[new_blk].model->outputs; 01751 while(port) { 01752 for(ipin = 0; ipin < port->size; ipin++) { 01753 inet = logical_block[new_blk].output_nets[port->index][ipin]; /* Output pin first. */ 01754 if (inet != OPEN) { 01755 if (!is_clock[inet] || !global_clocks) 01756 mark_and_update_partial_gain (inet, GAIN, new_blk, port->index, ipin, 01757 timing_driven, connection_driven, OUTPUT); 01758 else 01759 mark_and_update_partial_gain (inet, NO_GAIN, new_blk, port->index, ipin, 01760 timing_driven, connection_driven, OUTPUT); 01761 01762 cur_pb = logical_block[new_blk].pb->parent_pb; 01763 while(cur_pb) { 01764 depth = cur_pb->pb_graph_node->pb_type->depth; 01765 cur_pb->pb_stats.net_output_in_pb[inet] = TRUE; 01766 01767 if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] > 1 && !is_clock[inet]) 01768 cur_pb->pb_stats.inputs_avail++; 01769 01770 if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] + 01771 net_output_feeds_driving_block_input[inet] != vpack_net[inet].num_sinks + 1) { 01772 cur_pb->pb_stats.outputs_avail--; 01773 } else if (hack_frac_lut_no_legality) { 01774 if(depth > 1) 01775 cur_pb->pb_stats.outputs_avail--; /* frac lut cannot absorb pins after the top two levels */ 01776 } 01777 01778 cur_pb = cur_pb->parent_pb; 01779 } 01780 } 01781 } 01782 port = port->next; 01783 } 01784 port = logical_block[new_blk].model->inputs; 01785 while(port) { 01786 if(port->is_clock) { 01787 port = port->next; 01788 continue; 01789 } 01790 for (ipin=0; ipin<port->size; ipin++) { /* VPACK_BLOCK input pins. */ 01791 01792 inet = logical_block[new_blk].input_nets[port->index][ipin]; 01793 if (inet != OPEN) { 01794 mark_and_update_partial_gain (inet, GAIN, new_blk, port->index, ipin, timing_driven, connection_driven, INPUT); 01795 cur_pb = logical_block[new_blk].pb->parent_pb; 01796 while(cur_pb) { 01797 depth = cur_pb->pb_graph_node->pb_type->depth; 01798 01799 if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == 1) 01800 cur_pb->pb_stats.inputs_avail--; 01801 01802 assert(cur_pb->pb_stats.inputs_avail >= 0); 01803 if (cur_pb->pb_stats.net_output_in_pb[inet] && 01804 cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == vpack_net[inet].num_sinks + 1) { 01805 cur_pb->pb_stats.outputs_avail++; 01806 if (hack_frac_lut_no_legality && depth > 1) 01807 cur_pb->pb_stats.outputs_avail--; /* frac lut cannot absorb pins after the top two levels */ 01808 } 01809 01810 cur_pb = cur_pb->parent_pb; 01811 } 01812 } 01813 } 01814 port = port->next; 01815 } 01816 01817 01818 /* Note: The code below ONLY WORKS when nets that go to clock inputs * 01819 * NEVER go to the input of a VPACK_COMB. It doesn't really make electrical * 01820 * sense for that to happen, and I check this in the check_clocks * 01821 * function. Don't disable that sanity check. */ 01822 inet = logical_block[new_blk].clock_net; /* Clock input pin. */ 01823 if (inet != OPEN) { 01824 if (global_clocks) 01825 mark_and_update_partial_gain (inet, NO_GAIN, new_blk, 0, 01826 0, timing_driven, 01827 connection_driven, INPUT); 01828 else 01829 mark_and_update_partial_gain (inet, GAIN, new_blk, 0, 01830 0, timing_driven, 01831 connection_driven, INPUT); 01832 01833 01834 cur_pb = logical_block[new_blk].pb->parent_pb; 01835 while(cur_pb) { 01836 depth = cur_pb->pb_graph_node->pb_type->depth; 01837 01838 if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == 1) { 01839 cur_pb->pb_stats.clocks_avail--; 01840 } 01841 else if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == 2 && cur_pb->pb_stats.net_output_in_pb[inet]) { 01842 cur_pb->pb_stats.clocks_avail--; 01843 } 01844 /* Note: unlike inputs, I'm currently assuming there is no internal * 01845 * connection in a cluster from VPACK_LUT outputs to clock inputs. */ 01846 01847 cur_pb = cur_pb->parent_pb; 01848 } 01849 } 01850 01851 update_total_gain(alpha, beta, timing_driven, connection_driven, global_clocks, logical_block[new_blk].pb->parent_pb); 01852 01853 } 01854 01855 01856 /** Given a starting seed block, start_new_cluster determines the next cluster type to use. 01857 * It expands the FPGA if it cannot find a legal cluster for the logical block 01858 */ 01859 static void start_new_cluster(INP enum e_packer_algorithm packer_algorithm, 01860 INP const t_arch * arch, 01861 INOUTP t_block *new_cluster, 01862 INP int clb_index, 01863 INP int istart, 01864 INP float aspect, 01865 INOUTP int *num_used_instances_type, 01866 INOUTP int *num_instances_type, 01867 INP int num_models, 01868 INP int max_cluster_size, 01869 INP int max_primitive_inputs) { 01870 int i, j; 01871 boolean success; 01872 01873 assert(new_cluster->name == NULL); /* Check if this cluster is really empty */ 01874 01875 /* Allocate a dummy initial cluster and load a logical block as a seed and check if it is legal */ 01876 new_cluster->name = my_malloc(strlen(logical_block[istart].name) + 4); 01877 sprintf(new_cluster->name, "cb.%s", logical_block[istart].name); 01878 new_cluster->nets = NULL; 01879 new_cluster->type = NULL; 01880 new_cluster->pb = NULL; 01881 new_cluster->x = UNDEFINED; 01882 new_cluster->y = UNDEFINED; 01883 new_cluster->z = UNDEFINED; 01884 01885 success = FALSE; 01886 01887 while(!success) { 01888 for(i = 0; i < num_types; i++) { 01889 if(num_used_instances_type[i] < num_instances_type[i]) { 01890 new_cluster->type = &type_descriptors[i]; 01891 if(new_cluster->type == EMPTY_TYPE) { 01892 continue; 01893 } 01894 new_cluster->pb = my_calloc(1, sizeof(t_pb)); 01895 alloc_and_load_pb_stats(new_cluster->pb, num_models, max_cluster_size, max_primitive_inputs); 01896 new_cluster->pb->parent_pb = NULL; 01897 new_cluster->pb->pb_graph_node = new_cluster->type->pb_graph_head; 01898 01899 alloc_and_load_legalizer_for_cluster(new_cluster, clb_index, arch); 01900 for(j = 0; j < new_cluster->type->pb_graph_head->pb_type->num_modes && !success; j++) { 01901 new_cluster->pb->mode = j; 01902 if(models_feasible(packer_algorithm, istart, new_cluster->pb->pb_graph_node->pb_type, new_cluster->pb, new_cluster->pb->mode)) { 01903 success = (BLK_PASSED == alloc_and_load_pb(packer_algorithm, new_cluster->type->pb_graph_head, istart, new_cluster->pb, num_models, max_cluster_size, max_primitive_inputs)); 01904 } 01905 } 01906 if(success) { 01907 01908 if(hack_frac_lut_no_legality) { 01909 logical_block[istart].clb_index = clb_index; 01910 } 01911 /* TODO: For now, just grab any working cluster, in the future, heuristic needed to grab best complex block based on supply and demand */ 01912 break; 01913 } else { 01914 free_legalizer_for_cluster(new_cluster); 01915 free_pb_stats(new_cluster->pb->pb_stats, num_models); 01916 free(new_cluster->pb); 01917 } 01918 } 01919 } 01920 01921 /* Expand FPGA size and recalculate number of available cluster types*/ 01922 if (!success) { 01923 if(aspect >= 1.0) 01924 { 01925 ny++; 01926 nx = nint(ny * aspect); 01927 } 01928 else 01929 { 01930 nx++; 01931 ny = nint(nx / aspect); 01932 } 01933 printf("Not enough resources expand FPGA size to x = %d y = %d\n", nx, ny); 01934 if((nx > MAX_SHORT) || (ny > MAX_SHORT)) { 01935 printf("Circuit cannot pack into architecture, architecture size (nx = %d, ny = %d) exceeds packer range.\n", nx, ny); 01936 exit(1); 01937 } 01938 alloc_and_load_grid(num_instances_type); 01939 freeGrid(); 01940 } 01941 } 01942 num_used_instances_type[new_cluster->type->index]++; 01943 } 01944 01945 01946 /*****************************************/ 01947 /** Checks if adding iblk to the currently open cluster would violate 01948 * the cluster input and clock pin limitations. Returns TRUE if 01949 * iblk can be added to the cluster, FALSE otherwise. 01950 */ 01951 static boolean inputs_outputs_models_and_clocks_feasible (INP enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb) { 01952 01953 int ipin, inet, output_net; 01954 t_model_ports *port; 01955 t_pb* temp_pb; 01956 boolean inputs_feasible = TRUE; 01957 01958 int inputs_avail; 01959 01960 temp_pb = cur_pb; 01961 while(temp_pb && inputs_feasible) { 01962 inputs_avail = temp_pb->pb_stats.inputs_avail; 01963 if(inputs_avail == NOT_VALID) { 01964 inputs_avail = temp_pb->pb_graph_node->pb_type->num_input_pins; 01965 } 01966 01967 port = logical_block[iblk].model->outputs; 01968 while(port) { 01969 /* Outputs that connect to internal blocks free up an input pin. */ 01970 for(ipin = 0; ipin < port->size; ipin++) { 01971 output_net = logical_block[iblk].output_nets[port->index][ipin]; 01972 if (output_net != OPEN && temp_pb->pb_stats.num_pins_of_net_in_pb[output_net] != 0 && !is_clock[output_net]) 01973 inputs_avail++; 01974 } 01975 port = port->next; 01976 } 01977 01978 port = logical_block[iblk].model->inputs; 01979 while(port) { 01980 if(!port->is_clock) { 01981 /* Inputs that do not connect to an output pin of an internal block (including this one) require an input pin. */ 01982 for (ipin = 0; ipin < port->size; ipin++) { 01983 inet = logical_block[iblk].input_nets[port->index][ipin]; 01984 if (inet != OPEN && temp_pb->pb_stats.num_pins_of_net_in_pb[inet] == 0) { 01985 if(net_output_feeds_driving_block_input[inet] == 0) { 01986 inputs_avail--; 01987 } 01988 } 01989 } 01990 } 01991 port = port->next; 01992 } 01993 inputs_feasible = (inputs_avail >= 0); 01994 temp_pb = temp_pb->parent_pb; 01995 } 01996 if (outputs_clocks_and_models_feasible (packer_algorithm, iblk, is_clock, cur_pb) == FALSE) 01997 return (FALSE); 01998 01999 if (inputs_feasible) 02000 return (TRUE); 02001 else 02002 return (FALSE); 02003 } 02004 02005 /*****************************************/ 02006 /** This routine populates a list of feasible blocks outside the cluster then returns the best one for the list 02007 * not currently in a cluster and satisfies the feasibility 02008 * function passed in as is_feasible. If there are no feasible 02009 * blocks it returns NO_CLUSTER. 02010 */ 02011 static int get_highest_gain_logical_block ( INP enum e_packer_algorithm packer_algorithm, 02012 INOUTP t_pb *cur_pb, 02013 INP boolean *is_clock, 02014 INP boolean (*is_feasible) (enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb), 02015 INP enum e_gain_type gain_mode) { 02016 02017 int i, j, iblk, index; 02018 int best_hillgain; 02019 float best_gain; 02020 02021 int best_block; 02022 02023 best_gain = (float)NOT_VALID + 1.0; 02024 best_hillgain=NOT_VALID+1; 02025 02026 best_block = NO_CLUSTER; 02027 02028 if(cur_pb->pb_stats.cur_marked_model == NOT_VALID) { 02029 /* Divide into two cases for speed only. */ 02030 /* Typical case: not too many blocks have been marked. */ 02031 02032 if (cur_pb->pb_stats.num_marked_blocks < num_logical_blocks / MARKED_FRAC) { 02033 for (i=0;i<cur_pb->pb_stats.num_marked_blocks;i++) { 02034 iblk = cur_pb->pb_stats.marked_blocks[i]; 02035 if (logical_block[iblk].clb_index == NO_CLUSTER) { 02036 if (gain_mode != HILL_CLIMBING){ 02037 if (is_feasible (packer_algorithm, iblk, is_clock, cur_pb)) { 02038 add_blk_to_pb_stats_candidates(iblk, cur_pb->pb_stats.gain, cur_pb); 02039 if (cur_pb->pb_stats.gain[iblk] > best_gain) { 02040 best_gain = cur_pb->pb_stats.gain[iblk]; 02041 best_block = iblk; 02042 } 02043 } 02044 } 02045 else{ /*hill climbing*/ 02046 if (is_feasible (packer_algorithm, iblk, is_clock, cur_pb)) { 02047 add_blk_to_pb_stats_candidates(iblk, cur_pb->pb_stats.hillgain, cur_pb); 02048 if (cur_pb->pb_stats.hillgain[iblk] > best_hillgain) { 02049 best_hillgain = cur_pb->pb_stats.hillgain[iblk]; 02050 best_block = iblk; 02051 } 02052 } 02053 } 02054 } 02055 } 02056 } 02057 else { /* Some high fanout nets marked lots of blocks. */ 02058 for (iblk=0;iblk<num_logical_blocks;iblk++) { 02059 if (logical_block[iblk].clb_index == NO_CLUSTER) { 02060 if (gain_mode != HILL_CLIMBING){ 02061 if (is_feasible (packer_algorithm, iblk, is_clock, cur_pb)) { 02062 add_blk_to_pb_stats_candidates(iblk, cur_pb->pb_stats.gain, cur_pb); 02063 if (cur_pb->pb_stats.gain[iblk] > best_gain) { 02064 best_gain = cur_pb->pb_stats.gain[iblk]; 02065 best_block = iblk; 02066 } 02067 } 02068 } 02069 else{ /*hill climbing*/ 02070 if (is_feasible (packer_algorithm, iblk, is_clock, cur_pb)) { 02071 add_blk_to_pb_stats_candidates(iblk, cur_pb->pb_stats.hillgain, cur_pb); 02072 if (cur_pb->pb_stats.hillgain[iblk] > best_hillgain) { 02073 best_hillgain = cur_pb->pb_stats.hillgain[iblk]; 02074 best_block = iblk; 02075 } 02076 } 02077 } 02078 } 02079 } 02080 } 02081 02082 for(i = 0; i < cur_pb->pb_stats.num_marked_models; i++) { 02083 index = cur_pb->pb_stats.feasible_blocks[i][cur_pb->pb_stats.num_feasible_blocks[i] - 1]; 02084 if (gain_mode != HILL_CLIMBING) { 02085 if(cur_pb->pb_stats.gain[index] == cur_pb->pb_stats.gain[best_block]) { 02086 cur_pb->pb_stats.cur_marked_model = i; 02087 break; 02088 } 02089 } else { 02090 if(cur_pb->pb_stats.hillgain[index] == cur_pb->pb_stats.hillgain[best_block]) { 02091 cur_pb->pb_stats.cur_marked_model = i; 02092 break; 02093 } 02094 } 02095 } 02096 assert(cur_pb->pb_stats.num_marked_models == 0 || i < cur_pb->pb_stats.num_marked_models); 02097 } 02098 02099 if(cur_pb->pb_stats.cur_marked_model == NOT_VALID) { 02100 return NO_CLUSTER; 02101 } 02102 02103 for(i = 0; i < cur_pb->pb_stats.num_marked_models; i++) { 02104 for(j = 0; j < cur_pb->pb_stats.num_feasible_blocks[i]; j++) { 02105 if(cur_pb->pb_stats.num_feasible_blocks[cur_pb->pb_stats.cur_marked_model] != 0) { 02106 cur_pb->pb_stats.num_feasible_blocks[cur_pb->pb_stats.cur_marked_model]--; 02107 index = cur_pb->pb_stats.num_feasible_blocks[cur_pb->pb_stats.cur_marked_model]; 02108 iblk = cur_pb->pb_stats.feasible_blocks[cur_pb->pb_stats.cur_marked_model][index]; 02109 cur_pb->pb_stats.cur_marked_model = (cur_pb->pb_stats.cur_marked_model + 1) % cur_pb->pb_stats.num_marked_models; 02110 return iblk; 02111 } 02112 } 02113 } 02114 02115 return NO_CLUSTER; 02116 } 02117 02118 02119 /*****************************************/ 02120 /** Finds the vpack block with the the greatest gain that satisifies the 02121 * input, clock and capacity constraints of a cluster that are 02122 * passed in. If no suitable vpack block is found it returns NO_CLUSTER. 02123 */ 02124 static int get_logical_block_for_cluster ( INP enum e_packer_algorithm packer_algorithm, 02125 INOUTP t_pb *cur_pb, 02126 INP boolean *is_clock, 02127 INP boolean allow_unrelated_clustering) { 02128 02129 int best_block; 02130 02131 /* If cannot pack into primitive, try packing into cluster */ 02132 02133 02134 best_block = get_highest_gain_logical_block (packer_algorithm, cur_pb, is_clock, inputs_outputs_models_and_clocks_feasible, 02135 NOT_HILL_CLIMBING); 02136 02137 /* If no blocks have any gain to the current cluster, the code above * 02138 * will not find anything. However, another logical_block with no inputs in * 02139 * common with the cluster may still be inserted into the cluster. */ 02140 02141 if (allow_unrelated_clustering) 02142 if (best_block == NO_CLUSTER) 02143 best_block = get_free_logical_block_with_most_ext_inputs_for_cluster (packer_algorithm, cur_pb); 02144 02145 return best_block; 02146 } 02147 02148 /*****************************************/ 02149 static void alloc_and_load_cluster_info (INP int num_clb, INOUTP t_block *clb) { 02150 02151 /* Loads all missing clustering info necessary to complete clustering. */ 02152 int i, j, i_clb, node_index, ipin, iclass; 02153 int inport, outport, clockport; 02154 02155 const t_pb_type * pb_type; 02156 t_pb *pb; 02157 02158 for(i_clb = 0; i_clb < num_clb; i_clb++) { 02159 rr_node = clb[i_clb].pb->rr_graph; 02160 pb_type = clb[i_clb].pb->pb_graph_node->pb_type; 02161 pb = clb[i_clb].pb; 02162 02163 clb[i_clb].nets = my_malloc(clb[i_clb].type->num_pins * sizeof(int)); 02164 for(i = 0; i < clb[i_clb].type->num_pins; i++) { 02165 clb[i_clb].nets[i] = OPEN; 02166 } 02167 02168 inport = outport = clockport = 0; 02169 ipin = 0; 02170 /* Assume top-level pb and clb share a one-to-one connection */ 02171 for(i = 0; i < pb_type->num_ports; i++) { 02172 if(!pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT) { 02173 for(j = 0; j < pb_type->ports[i].num_pins; j++) { 02174 iclass = clb[i_clb].type->pin_class[ipin]; 02175 assert(clb[i_clb].type->class_inf[iclass].type == RECEIVER); 02176 assert(!clb[i_clb].type->is_global_pin[ipin]); 02177 node_index = pb->pb_graph_node->input_pins[inport][j].pin_count_in_cluster; 02178 clb[i_clb].nets[ipin] = rr_node[node_index].net_num; 02179 ipin++; 02180 } 02181 inport++; 02182 } else if(pb_type->ports[i].type == OUT_PORT) { 02183 for(j = 0; j < pb_type->ports[i].num_pins; j++) { 02184 iclass = clb[i_clb].type->pin_class[ipin]; 02185 assert(clb[i_clb].type->class_inf[iclass].type == DRIVER); 02186 node_index = pb->pb_graph_node->output_pins[outport][j].pin_count_in_cluster; 02187 clb[i_clb].nets[ipin] = rr_node[node_index].net_num; 02188 ipin++; 02189 } 02190 outport++; 02191 } else { 02192 assert(pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT); 02193 for(j = 0; j < pb_type->ports[i].num_pins; j++) { 02194 iclass = clb[i_clb].type->pin_class[ipin]; 02195 assert(clb[i_clb].type->class_inf[iclass].type == RECEIVER); 02196 assert(clb[i_clb].type->is_global_pin[ipin]); 02197 node_index = pb->pb_graph_node->clock_pins[clockport][j].pin_count_in_cluster; 02198 clb[i_clb].nets[ipin] = rr_node[node_index].net_num; 02199 ipin++; 02200 } 02201 clockport++; 02202 } 02203 } 02204 } 02205 } 02206 02207 /* TODO: Add more error checking, too light */ 02208 /*****************************************/ 02209 static void check_clustering (int num_clb, t_block *clb, boolean *is_clock) { 02210 int i; 02211 t_pb * cur_pb; 02212 boolean *blocks_checked; 02213 02214 blocks_checked = my_calloc(num_logical_blocks, sizeof(boolean)); 02215 02216 /* 02217 * Check that each logical block connects to one primitive and that the primitive links up to the parent clb 02218 */ 02219 for(i = 0; i < num_blocks; i++) { 02220 if(logical_block[i].pb->logical_block != i) { 02221 printf(ERRTAG "pb %s does not contain logical block %s but logical block %s #%d links to pb\n", 02222 logical_block[i].pb->name, logical_block[i].name, logical_block[i].name, i); 02223 exit(1); 02224 } 02225 cur_pb = logical_block[i].pb; 02226 assert(strcmp(cur_pb->name, logical_block[i].name) == 0); 02227 while(cur_pb->parent_pb) { 02228 cur_pb = cur_pb->parent_pb; 02229 assert(cur_pb->name); 02230 } 02231 if(cur_pb != clb[num_clb].pb) { 02232 printf(ERRTAG "CLB %s does not match CLB contained by pb %s\n", 02233 cur_pb->name, logical_block[i].pb->name); 02234 exit(1); 02235 } 02236 } 02237 02238 /* Check that I do not have spurious links in children pbs */ 02239 for(i = 0; i < num_clb; i++) { 02240 check_cluster_logical_blocks(clb[i].pb, blocks_checked); 02241 } 02242 02243 for(i = 0; i < num_logical_blocks; i++) { 02244 if(blocks_checked[i] == FALSE) { 02245 printf(ERRTAG "Logical block %s #%d not found in any cluster\n", logical_block[i].name, i); 02246 exit(1); 02247 } 02248 } 02249 02250 free(blocks_checked); 02251 } 02252 02253 /* TODO: May want to check that all logical blocks are actually reached (low priority, nice to have) */ 02254 static void check_cluster_logical_blocks (t_pb *pb, boolean *blocks_checked) { 02255 int i, j; 02256 const t_pb_type *pb_type; 02257 boolean has_child; 02258 02259 has_child = FALSE; 02260 pb_type = pb->pb_graph_node->pb_type; 02261 if(pb_type->num_modes == 0) { 02262 /* primitive */ 02263 if(pb->logical_block != OPEN) { 02264 if(blocks_checked[pb->logical_block] != FALSE) { 02265 printf(ERRTAG "pb %s contains logical block %s #%d but logical block is already contained in another pb\n", 02266 pb->name, logical_block[pb->logical_block].name, pb->logical_block); 02267 exit(1); 02268 } 02269 blocks_checked[pb->logical_block] = TRUE; 02270 if(pb != logical_block[pb->logical_block].pb) { 02271 printf(ERRTAG "pb %s contains logical block %s #%d but logical block does not link to pb\n", 02272 pb->name, logical_block[pb->logical_block].name, pb->logical_block); 02273 exit(1); 02274 } 02275 } 02276 } else { 02277 /* this is a container pb, all container pbs must contain children */ 02278 for(i = 0; i < pb_type->modes[pb->mode].num_pb_type_children; i++) { 02279 for(j = 0; j < pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) { 02280 if(pb->child_pbs[i] != NULL) { 02281 if(pb->child_pbs[i][j].name != NULL) { 02282 has_child = TRUE; 02283 check_cluster_logical_blocks(&pb->child_pbs[i][j], blocks_checked); 02284 } 02285 } 02286 } 02287 } 02288 assert(has_child); 02289 } 02290 } 02291 02292 /** calculate_timing_information must be called before this, or this function 02293 * will return garbage 02294 */ 02295 static int get_most_critical_unclustered_block(void) { 02296 /*indexofcrit is a global variable that is reset to 0 each time a * 02297 *timing analysis is done (reset in sort_blocks_by_criticality) */ 02298 02299 int critidx, blkidx; 02300 02301 for (critidx=indexofcrit; critidx<num_logical_blocks; critidx++) { 02302 blkidx=critindexarray[critidx]; 02303 if (logical_block[blkidx].clb_index == NO_CLUSTER) { 02304 indexofcrit=critidx+1; 02305 return(blkidx); 02306 } 02307 } 02308 02309 /*if it makes it to here , there are no more blocks available*/ 02310 return(NO_CLUSTER); 02311 } 02312 02313 static void free_cb (t_pb *pb, int max_models) { 02314 const t_pb_type * pb_type; 02315 int i, total_nodes; 02316 02317 pb_type = pb->pb_graph_node->pb_type; 02318 02319 total_nodes = pb->pb_graph_node->total_pb_pins + pb_type->num_input_pins + pb_type->num_output_pins + pb_type->num_clock_pins; 02320 02321 for(i = 0; i < total_nodes; i++) { 02322 if(pb->rr_graph[i].edges != NULL) { 02323 free(pb->rr_graph[i].edges); 02324 } 02325 if(pb->rr_graph[i].switches != NULL) { 02326 free(pb->rr_graph[i].switches); 02327 } 02328 } 02329 free(pb->rr_graph); 02330 free_pb(pb, max_models); 02331 } 02332 02333 02334 static void free_pb (t_pb *pb, int max_models) { 02335 const t_pb_type * pb_type; 02336 int i, j, mode; 02337 02338 pb_type = pb->pb_graph_node->pb_type; 02339 02340 if(pb_type->blif_model == NULL) { 02341 mode = pb->mode; 02342 for(i = 0; i < pb_type->modes[mode].num_pb_type_children && pb->child_pbs != NULL; i++) { 02343 for(j = 0; j < pb_type->modes[mode].pb_type_children[i].num_pb && pb->child_pbs[i] != NULL; j++) { 02344 if(pb->child_pbs[i][j].name != NULL) { 02345 free_pb(&pb->child_pbs[i][j], max_models); 02346 } 02347 } 02348 if(pb->child_pbs[i]) 02349 free(pb->child_pbs[i]); 02350 } 02351 if(pb->child_pbs); 02352 free(pb->child_pbs); 02353 free(pb->name); 02354 } else { 02355 /* Primitive */ 02356 if(pb->name) 02357 free(pb->name); 02358 pb->name = NULL; 02359 } 02360 } 02361 02362 static void free_pb_stats(t_pb_stats pb_stats, int max_models) { 02363 int i; 02364 02365 if(pb_stats.gain != NULL) { 02366 free(pb_stats.gain); 02367 free(pb_stats.lengthgain); 02368 free(pb_stats.sharinggain); 02369 free(pb_stats.hillgain); 02370 free(pb_stats.connectiongain); 02371 free(pb_stats.num_feasible_blocks); 02372 02373 for(i = 0; i < max_models; i ++) { 02374 free(pb_stats.feasible_blocks[i]); 02375 } 02376 free(pb_stats.feasible_blocks); 02377 02378 free(pb_stats.num_pins_of_net_in_pb); 02379 free(pb_stats.net_output_in_pb); 02380 free(pb_stats.marked_nets); 02381 free(pb_stats.marked_blocks); 02382 02383 pb_stats.gain = NULL; 02384 } 02385 } 02386