VPR-6.0

vpr/SRC/pack/cluster.c

Go to the documentation of this file.
00001 /**
00002  * @file
00003  *
00004  * Main clustering algorithm
00005  * Author(s): Vaughn Betz (first revision - VPack), Alexander Marquardt (second revision - T-VPack), Jason Luu (third revision - AAPack)
00006  * June 8, 2011
00007  */
00008 
00009 #include <stdio.h>
00010 #include <assert.h>
00011 #include <string.h>
00012 
00013 #include "util.h"
00014 #include "vpr_types.h"
00015 #include "globals.h"
00016 #include "cluster.h"
00017 #include "heapsort.h"
00018 #include "output_clustering.h"
00019 #include "output_blif.h"
00020 #include "SetupGrid.h"
00021 #include "read_xml_arch_file.h"
00022 #include "cluster_legality.h"
00023 #include "path_delay2.h"
00024 #include "path_delay.h"
00025 
00026 #define AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE 30      /**< This value is used to determine the max size of the priority queue for candidates that pass the early filter legality test but not the more detailed routing test */
00027 
00028 #define SCALE_NUM_PATHS 1e-2     /**< this value is used as a multiplier to assign a    
00029                                   * slightly higher criticality value to nets that    
00030                                   * affect a large number of critical paths versus    
00031                                   * nets that do not have such a broad effect.        
00032                                   * Note that this multiplier is intentionally very    
00033                                   * small compared to the total criticality because   
00034                                   * we want to make sure that vpack_net criticality is      
00035                                   * primarily determined by slacks, with this acting  
00036                                   * only as a tie-breaker between otherwise equal nets
00037                                   */
00038 #define SCALE_DISTANCE_VAL 1e-4  /**< this value is used as a multiplier to assign a    
00039                                   * slightly higher criticality value to nets that    
00040                                   * are otherwise equal but one is  farther           
00041                                   * from sources (the farther one being made slightly 
00042                                   * more critical)                                    
00043                                   */
00044 
00045 
00046 enum e_gain_update {GAIN, NO_GAIN};
00047 enum e_feasibility {FEASIBLE, INFEASIBLE};
00048 enum e_gain_type {HILL_CLIMBING, NOT_HILL_CLIMBING};
00049 enum e_removal_policy {REMOVE_CLUSTERED, LEAVE_CLUSTERED};
00050 enum e_net_relation_to_clustered_block {INPUT, OUTPUT};
00051 
00052 /** Linked list structure.  Stores one integer (iblk). */
00053 struct s_ilink {int iblk; struct s_ilink *next;};
00054 
00055 
00056 /** 1/MARKED_FRAC is the fraction of nets or blocks that must be
00057  * marked in order for the brute force (go through the whole    
00058  * data structure linearly) gain update etc. code to be used.   
00059  * This is done for speed only; make MARKED_FRAC whatever       
00060  * number speeds the code up most.                              
00061  */
00062 #define MARKED_FRAC 2   
00063 
00064 /** Keeps a linked list of the unclustered blocks to speed up looking for 
00065  * unclustered blocks with a certain number of *external* inputs.        
00066  * [0..lut_size].  Unclustered_list_head[i] points to the head of the   
00067  * list of blocks with i inputs to be hooked up via external interconnect. 
00068  */
00069 static struct s_ilink *unclustered_list_head;
00070 int unclustered_list_head_size;
00071 static struct s_ilink *memory_pool; /**< Declared here so I can free easily.*/
00072 
00073 /** Does the logical_block that drives the output of this vpack_net also appear as a   
00074  * receiver (input) pin of the vpack_net?  [0..num_logical_nets-1].  If so, then by how much? This is used     
00075  * in the gain routines to avoid double counting the connections from   
00076  * the current cluster to other blocks (hence yielding better           
00077  * clusterings).  The only time a logical_block should connect to the same vpack_net  
00078  * twice is when one connection is an output and the other is an input, 
00079  * so this should take care of all multiple connections.                
00080  */
00081 static int *net_output_feeds_driving_block_input;
00082 
00083 static int hack_frac_lut_no_legality;
00084 static int hack_force_safe_latch;
00085 
00086 static int indexofcrit; /**< index of next most timing critical block */
00087 
00088 /**@{*/
00089 /** Timing information for blocks */
00090 static float *criticality = NULL;
00091 static int *critindexarray = NULL;
00092 /**@}*/
00093 
00094 static float **net_pin_backward_criticality = NULL;
00095 static float **net_pin_forward_criticality = NULL;
00096 
00097 
00098 
00099 /*****************************************/
00100 /*local functions*/
00101 /*****************************************/
00102 
00103 static int num_ext_inputs (int iblk);
00104 static void check_clocks (boolean *is_clock);
00105 
00106 static int get_max_cluster_size(t_pb_type *pb_type);
00107 static int get_max_primitive_inputs(t_pb_type *pb_type);
00108 static int get_max_pb_depth(t_pb_type *pb_type);
00109 
00110 #if 0
00111 static void check_for_duplicate_inputs ();
00112 #endif 
00113 static boolean is_logical_blk_in_pb(int iblk, t_pb *pb);
00114 
00115 static void add_blk_to_pb_stats_candidates(int iblk, float *gain, t_pb *pb);
00116 
00117 static void alloc_and_init_clustering (boolean global_clocks, float alpha, float beta, int max_cluster_size, int max_primitive_inputs, int max_pb_depth, int max_models);
00118 static void free_pb_stats_recursive (t_pb *pb, int max_models);
00119 static boolean outputs_clocks_and_models_feasible (enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb);
00120 static boolean models_feasible(enum e_packer_algorithm packer_algorithm, int iblk, const t_pb_type *cur_pb_type, t_pb *cur_pb, int mode);
00121 static boolean primitive_feasible(int iblk, t_pb *cur_pb);
00122 static boolean primitive_type_feasible(int iblk, const t_pb_type *cur_pb_type, t_pb *memory_class_pb, int sibling_memory_blk);
00123 
00124 
00125 static int get_logical_block_by_num_ext_inputs (INP enum e_packer_algorithm packer_algorithm,
00126                                                                                 INOUTP t_pb *cur_pb,
00127                                                                                 INP int ext_inps, 
00128                                                                                 INP enum e_removal_policy remove_flag);
00129 
00130 static int get_free_logical_block_with_most_ext_inputs_for_cluster (INP enum e_packer_algorithm packer_algorithm,
00131                                                                                                                                         INOUTP t_pb *cur_pb);
00132 
00133 static int get_seed_logical_block_with_most_ext_inputs (int max_primitive_inputs);
00134 static void alloc_and_load_pb_stats (t_pb *pb, int max_models, int max_cluster_size, int max_primitive_inputs);
00135 
00136 static enum e_block_pack_status alloc_and_load_pb(INP enum e_packer_algorithm packer_algorithm, INP t_pb_graph_node *pb_graph_node, INP int iblock, INOUTP t_pb * pb, INP int max_models, INP int max_cluster_size, INP int max_primitive_inputs);
00137 
00138 static void update_connection_gain_values (int inet, int clustered_block, t_pb * cur_pb,
00139                                                 enum e_net_relation_to_clustered_block net_relation_to_clustered_block);
00140 
00141 static void update_length_gain_values (int inet, int clustered_block, t_pb* cur_pb,
00142                                                 enum e_net_relation_to_clustered_block net_relation_to_clustered_block);
00143 
00144 static void mark_and_update_partial_gain (int inet, 
00145                                                                                   enum e_gain_update gain_flag, 
00146                                                                                   int clustered_block, 
00147                                                                                   int port_on_clustered_block, 
00148                                                                                   int pin_on_clustered_block,
00149                                                                                   boolean timing_driven, 
00150                                                                                   boolean connection_driven,
00151                                                                                   enum e_net_relation_to_clustered_block net_relation_to_clustered_block);
00152 
00153 static void update_total_gain(float alpha, float beta, boolean timing_driven, boolean 
00154                               connection_driven, boolean global_clocks, t_pb *pb);
00155 
00156 static void update_cluster_stats (      INP int new_blk,
00157                                                                         INP int clb_index,
00158                                                                         INP boolean *is_clock, 
00159                                                                         INP boolean global_clocks, 
00160                                                                         INP float alpha, 
00161                                                                         INP float beta,
00162                                                                         INP boolean timing_driven, 
00163                                                                         INP boolean connection_driven);
00164 
00165 static void start_new_cluster(INP enum e_packer_algorithm packer_algorithm,
00166                                                           INP const t_arch * arch,
00167                                                           INOUTP t_block *new_cluster,
00168                                                           INP int clb_index,
00169                                                           INP int istart,
00170                                                                 INP float aspect,
00171                                                                 INOUTP int *num_used_instances_type, 
00172                                                                 INOUTP int *num_instances_type,
00173                                                                 INP int num_models,
00174                                                                 INP int max_cluster_size,
00175                                                                 INP int max_primitive_inputs);
00176 
00177 static boolean inputs_outputs_models_and_clocks_feasible (INP enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb);
00178 
00179 static int get_highest_gain_logical_block (INP enum e_packer_algorithm packer_algorithm,
00180                                                                                    INOUTP t_pb *cur_pb,
00181                                                                         INP boolean *is_clock, 
00182                                                                         INP boolean (*is_feasible) 
00183                                                                         (enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb), 
00184                                                                         INP enum e_gain_type gain_mode);
00185 
00186 static int get_logical_block_for_cluster (INP enum e_packer_algorithm packer_algorithm,
00187                                                                                   INP t_pb *cur_pb,
00188                                                                                  INP boolean *is_clock,
00189                                                                                  INP boolean allow_unrelated_clustering);
00190 
00191 static int get_free_logical_block_with_fewest_ext_inputs (INP enum e_packer_algorithm packer_algorithm,
00192                                                                                                                   INOUTP t_pb *cur_pb);
00193 
00194 static int get_most_feasible_logical_block (INOUTP t_pb *cur_pb);
00195 
00196 static void alloc_and_load_cluster_info (INP int num_clb, INOUTP t_block *clb);
00197 
00198 static void check_clustering (int num_clb, t_block *clb, boolean *is_clock);
00199 
00200 static void check_cluster_logical_blocks (t_pb *pb, boolean *blocks_checked);
00201 
00202 static int get_most_critical_unclustered_block(void);
00203 
00204 static void free_cb (t_pb *pb, int max_models);
00205 static void free_pb_stats(t_pb_stats pb_stats, int max_models);
00206 static void free_pb (t_pb *pb, int max_models);
00207 
00208 /*****************************************/
00209 /*globally accessable function*/
00210 
00211 /** Does the actual work of clustering multiple VPACK_LUT+FF logic blocks 
00212  * into clusters.                                                  
00213  *
00214  * note: I allow timing_driven and connection_driven to be simultaneously true
00215  * in this case, connection_driven is responsible for all clustering decisions
00216  * but timing analysis is still enabled (useful for viewing the constant delay
00217  * results)
00218  * Algorithm employed:
00219  * 1.  Find type that can legally hold block and create cluster with pb info
00220  * 2.  Populate started cluster
00221  * 3.  Repeat 1 until no more blocks need to be clustered
00222  */
00223 void do_clustering (const t_arch *arch, int num_models, boolean global_clocks,
00224        boolean *is_clock, boolean hill_climbing_flag, 
00225        char *out_fname, boolean timing_driven, 
00226        enum e_cluster_seed cluster_seed_type, float alpha, float beta,
00227        int recompute_timing_after, float block_delay, 
00228        float intra_cluster_net_delay, float inter_cluster_net_delay,
00229            float aspect,
00230        boolean allow_unrelated_clustering, 
00231        boolean allow_early_exit, boolean connection_driven, 
00232            enum e_packer_algorithm packer_algorithm,
00233            boolean hack_no_legal_frac_lut, boolean hack_safe_latch){
00234 
00235 
00236  int istart, i, j, k, m, iblk;
00237  int blocks_since_last_analysis;
00238  int next_blk, prev_blk;
00239 
00240  int num_blocks_hill_added;
00241  int num_logical_blocks_clustered;
00242  int max_cluster_size, cur_cluster_size, max_primitive_inputs, cur_primitive_inputs, max_pb_depth, cur_pb_depth;
00243 
00244  int *hill_climbing_inputs_avail;
00245  
00246  t_block *clb; 
00247  t_pb *cur_pb;
00248  int num_clb;
00249 
00250  boolean critical_path_minimized;
00251  boolean early_exit;
00252  
00253  enum e_block_pack_status block_pack_status;
00254 
00255  int *num_used_instances_type, *num_instances_type; /* [0..num_types] Holds array for total number of each cluster_type available */
00256  float **net_slack = NULL;
00257  float num_paths_scaling, distance_scaling;
00258 
00259  float crit;
00260 
00261  hack_frac_lut_no_legality = hack_no_legal_frac_lut;
00262  hack_force_safe_latch = hack_safe_latch;
00263 
00264  /* TODO: This is memory inefficient, fix if causes problems */
00265  clb = my_calloc(num_logical_blocks,sizeof(t_block));
00266  num_clb = 0;
00267  istart = NO_CLUSTER;
00268 
00269  /* determine bound on cluster size and primitive input size */
00270  max_cluster_size = 0;
00271  max_primitive_inputs = 0;
00272  max_pb_depth = 0;
00273 
00274  indexofcrit = 0;
00275 
00276  for(i = 0; i < num_types; i++) {
00277          if(EMPTY_TYPE == &type_descriptors[i]) 
00278                  continue;
00279          cur_cluster_size = get_max_cluster_size(type_descriptors[i].pb_type);
00280          cur_primitive_inputs = get_max_primitive_inputs(type_descriptors[i].pb_type);
00281          cur_pb_depth = get_max_pb_depth(type_descriptors[i].pb_type);
00282          if(cur_cluster_size > max_cluster_size) {
00283                  max_cluster_size = cur_cluster_size;
00284          }
00285          if(cur_primitive_inputs > max_primitive_inputs) {
00286                  max_primitive_inputs = cur_primitive_inputs;
00287          }
00288          if(cur_pb_depth > max_pb_depth) {
00289                  max_pb_depth = cur_pb_depth;
00290          }
00291  }
00292 
00293  if(hill_climbing_flag) {
00294          hill_climbing_inputs_avail = (int *) my_calloc (max_cluster_size + 1, sizeof(int));
00295  } else {
00296          hill_climbing_inputs_avail = NULL; /* if used, die hard */
00297  }
00298 
00299  /* TODO: make better estimate for nx and ny */
00300  nx = ny = 1;
00301 
00302  check_clocks (is_clock);
00303 #if 0
00304  check_for_duplicate_inputs ();
00305 #endif
00306  alloc_and_init_clustering (global_clocks, alpha, beta, max_cluster_size, max_primitive_inputs, max_pb_depth, num_models);
00307 
00308  blocks_since_last_analysis = 0;
00309  critical_path_minimized = FALSE;
00310  early_exit = FALSE;
00311  num_logical_blocks_clustered = 0;
00312  num_blocks_hill_added = 0;
00313  num_used_instances_type = (int*)my_calloc (num_types, sizeof (int));
00314  num_instances_type = (int*)my_calloc (num_types, sizeof (int));
00315  
00316  assert(max_cluster_size < MAX_SHORT); /* Limit maximum number of elements for each cluster */
00317  
00318  if (timing_driven){
00319          net_slack = alloc_and_load_pre_packing_timing_graph(block_delay, inter_cluster_net_delay, arch->models);
00320         load_net_slack(net_slack, 0);
00321         
00322         criticality=(float*)my_calloc(num_logical_blocks,sizeof(float));
00323 
00324         critindexarray=(int*)my_malloc(num_logical_blocks*sizeof(int));
00325 
00326         for(i = 0; i < num_logical_blocks; i++) {
00327                 critindexarray[i] = i;
00328         }
00329 
00330         /* Calculate criticality based on slacks and tie breakers (# paths, distance from source) */
00331         for(i = 0; i < num_tnodes; i++) {
00332                 iblk = tnode[i].block;
00333                 num_paths_scaling = SCALE_NUM_PATHS * (float)tnode[i].normalized_total_critical_paths;
00334                 distance_scaling = SCALE_DISTANCE_VAL * (float)tnode[i].normalized_T_arr;
00335                 crit = (1 - tnode[i].normalized_slack) + num_paths_scaling + distance_scaling;
00336                 if(criticality[iblk] < crit) {
00337                         criticality[iblk] = crit;
00338                 }
00339         }
00340 
00341         net_pin_backward_criticality = (float**)my_calloc(num_logical_nets, sizeof(float*));
00342         net_pin_forward_criticality = (float**)my_calloc(num_logical_nets, sizeof(float*));
00343         for(i = 0; i < num_logical_nets; i++) {
00344                 net_pin_backward_criticality[i] = (float *)my_calloc(vpack_net[i].num_sinks + 1, sizeof(float));
00345                 net_pin_forward_criticality[i] = (float *)my_calloc(vpack_net[i].num_sinks + 1, sizeof(float));
00346                 for(j = 0; j <= vpack_net[i].num_sinks; j++) {
00347                         net_pin_backward_criticality[i][j] = criticality[vpack_net[i].node_block[j]];
00348                         net_pin_forward_criticality[i][j] = criticality[vpack_net[i].node_block[0]];
00349                 }
00350         }
00351 
00352         heapsort(critindexarray, criticality, num_logical_blocks, 1);
00353 
00354         /*print_critical_path("clustering_critical_path.echo");*/
00355 
00356 
00357    if (cluster_seed_type == VPACK_TIMING){
00358      istart=get_most_critical_unclustered_block();
00359    }
00360    else {/*max input seed*/
00361         printf("get_seed_logical_block_with_most_ext_inputs\n");
00362            istart = get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs);
00363    }
00364  }
00365  else /*cluster seed is max input (since there is no timing information)*/{
00366          istart = get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs);
00367  }
00368 
00369  while (istart != NO_CLUSTER) {
00370 
00371         reset_legalizer_for_cluster(&clb[num_clb]);
00372         
00373         /* start a new cluster and reset all stats */
00374         start_new_cluster(packer_algorithm, arch, &clb[num_clb], num_clb, istart, aspect, num_used_instances_type, num_instances_type, num_models, max_cluster_size, max_primitive_inputs);
00375         if(logical_block[istart].type != VPACK_INPAD && logical_block[istart].type != VPACK_OUTPAD) 
00376         {
00377                 printf("Complex Block %d: %s type %s\n", num_clb, clb[num_clb].name, clb[num_clb].type->name);
00378                 fflush(stdout);
00379         }
00380         update_cluster_stats (istart, num_clb, is_clock, global_clocks, alpha, beta, timing_driven, connection_driven);
00381         num_clb++;
00382          
00383 
00384     num_logical_blocks_clustered ++;
00385 
00386     if (timing_driven && !early_exit) {
00387       blocks_since_last_analysis++;
00388       /*it doesn't make sense to do a timing analysis here since there*
00389        *is only one logical_block clustered it would not change anything      */
00390     }
00391          
00392         /* First try to pack primitive into cluster, select next block based on closest open sibling if available */
00393         cur_pb = logical_block[istart].pb->parent_pb;
00394         prev_blk = NO_CLUSTER;
00395 
00396         while (cur_pb) {
00397                 
00398                 if(packer_algorithm == PACK_BRUTE_FORCE) { /* if using brute force approach, look at all possible free blocks in cluster */
00399                         cur_pb = clb[num_clb - 1].pb;
00400                 }
00401 
00402                 next_blk = get_logical_block_for_cluster(packer_algorithm,
00403                                                                                 cur_pb, 
00404                                                                                  is_clock,
00405                                                                                  allow_unrelated_clustering);
00406 
00407                 block_pack_status = alloc_and_load_pb(packer_algorithm, cur_pb->pb_graph_node, next_blk, cur_pb, num_models, max_cluster_size, max_primitive_inputs); 
00408                 if(block_pack_status != BLK_PASSED) {
00409                         if(next_blk != NO_CLUSTER && prev_blk != next_blk) {
00410                                 if(block_pack_status == BLK_FAILED_ROUTE) {
00411                                         #ifdef DEBUG_FAILED_PACKING_CANDIDATES
00412                                                 printf("NO_ROUTE:%s#%d|%s ", logical_block[next_blk].name, next_blk, logical_block[next_blk].model->name); 
00413                                         #else
00414                                                 printf(".");
00415                                         #endif
00416                                 } else {
00417                                         #ifdef DEBUG_FAILED_PACKING_CANDIDATES
00418                                                 printf("FAILED_CHECK:%s#%d|%s check %d", logical_block[next_blk].name, next_blk, logical_block[next_blk].model->name, block_pack_status); 
00419                                         #else
00420                                                 printf(".");
00421                                         #endif
00422                                 }
00423                                 prev_blk = next_blk;
00424                         } else {
00425                                 prev_blk = NO_CLUSTER;
00426                                 cur_pb = cur_pb->parent_pb;
00427                         }
00428                         continue;
00429                 } else {
00430                         /* Continue packing by filling smallest cluster */
00431                         if(hack_frac_lut_no_legality) {
00432                                 logical_block[next_blk].clb_index = num_clb - 1;
00433                         }
00434                         cur_pb = logical_block[next_blk].pb->parent_pb;
00435                         prev_blk = NO_CLUSTER;
00436                         #ifdef DEBUG_FAILED_PACKING_CANDIDATES                  
00437                                 printf("PASSED:%s#%d ", logical_block[next_blk].name, next_blk); 
00438                         #else
00439                                 printf(".");
00440                         #endif
00441                 }
00442                 update_cluster_stats (next_blk, num_clb - 1, is_clock, global_clocks, 
00443                                                 alpha, beta, timing_driven, connection_driven);
00444 
00445                 num_logical_blocks_clustered ++;
00446 
00447                 if (timing_driven && !early_exit) {
00448                 blocks_since_last_analysis++; /* historically, timing slacks were recomputed after X number of blocks were packed, but this doesn't significantly alter results so I (jluu) did not port the code */
00449         
00450       }
00451     }
00452         if(logical_block[istart].type != VPACK_INPAD && logical_block[istart].type != VPACK_OUTPAD) 
00453         {
00454                 printf("\n");
00455         }
00456 
00457         save_cluster_solution();
00458         if(hack_frac_lut_no_legality) {
00459                 k = 0;
00460                 for(i = 0; i < clb[num_clb-1].pb->pb_graph_node->num_input_ports; i++) {
00461                         for(j = 0; j < clb[num_clb-1].pb->pb_graph_node->num_input_pins[i]; j++) {
00462                                 while(k < num_logical_nets) { 
00463                                         if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && !vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0)
00464                                         {
00465                                                 rr_node[clb[num_clb-1].pb->pb_graph_node->input_pins[i][j].pin_count_in_cluster].net_num = k;
00466                                                 k++; 
00467                                                 break;
00468                                         }
00469                                         k++; 
00470                                 }               
00471                         }
00472                 }
00473                 /* check if nets are overused */
00474                 while(k < num_logical_nets) { 
00475                         if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && !vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0)
00476                         {
00477                                 printf(ERRTAG "Too many input nets used in cluster %s\n", clb[num_clb-1].name);
00478                                 assert(0);
00479                         }
00480                         k++; 
00481                 }                               
00482                 k = 0;
00483                 for(i = 0; i < clb[num_clb-1].pb->pb_graph_node->num_output_ports; i++) {
00484                         for(j = 0; j < clb[num_clb-1].pb->pb_graph_node->num_output_pins[i]; j++) {
00485                                 while(k < num_logical_nets) { 
00486                                         for(m = 0; m <= vpack_net[k].num_sinks; m++) {
00487                                                 if(logical_block[vpack_net[k].node_block[m]].clb_index != num_clb - 1) {
00488                                                         break;
00489                                                 }
00490                                         }
00491                                         if(clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && (clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] < vpack_net[k].num_sinks + 1))
00492                                         {
00493                                                 rr_node[clb[num_clb-1].pb->pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num = k;
00494                                                 k++; 
00495                                                 break;
00496                                         }
00497                                         k++; 
00498                                 }
00499                         }
00500                 }
00501                 while(k < num_logical_nets) { 
00502                         if(clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && (clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] < vpack_net[k].num_sinks + 1))
00503                         {
00504                                 printf(ERRTAG "Too many output nets used in cluster %s\n", clb[num_clb-1].name);
00505                                 assert(0);
00506                         }
00507                         k++; 
00508                 }               
00509                 k = 0;
00510                 for(i = 0; i < clb[num_clb-1].pb->pb_graph_node->num_clock_ports; i++) {
00511                         for(j = 0; j < clb[num_clb-1].pb->pb_graph_node->num_clock_pins[i]; j++) {
00512                                 while(k < num_logical_nets) { 
00513                                         if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0)
00514                                         {
00515                                                 rr_node[clb[num_clb-1].pb->pb_graph_node->clock_pins[i][j].pin_count_in_cluster].net_num = k;
00516                                                 k++; 
00517                                                 break;
00518                                         }
00519                                         k++; 
00520                                 }
00521                         }
00522                 }       
00523                 while(k < num_logical_nets) { 
00524                         if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0)
00525                         {
00526                                 printf(ERRTAG "Too many clock nets used in cluster %s\n", clb[num_clb-1].name);
00527                                 assert(0);
00528                         }
00529                         k++; 
00530                 }               
00531         }
00532 
00533     if (timing_driven){
00534       if (num_blocks_hill_added > 0  && !early_exit) {
00535                 blocks_since_last_analysis += num_blocks_hill_added;
00536       }
00537 
00538       if (cluster_seed_type == VPACK_TIMING){
00539         istart=get_most_critical_unclustered_block();
00540       }
00541       else { /*max input seed*/
00542                 istart = get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs);
00543       }
00544     }
00545     else /*cluster seed is max input (since there is no timing information)*/
00546       istart=get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs);
00547 
00548     free_pb_stats_recursive (clb[num_clb-1].pb, num_models);
00549  }
00550 
00551  if (timing_driven){
00552    free_timing_graph(net_slack);
00553  }
00554 
00555  free_cluster_legality_checker();
00556 
00557  alloc_and_load_cluster_info (num_clb, clb); 
00558 
00559  check_clustering (num_clb, clb, is_clock);
00560 
00561 
00562  output_clustering(     clb, num_clb, global_clocks, is_clock, out_fname, FALSE);
00563 #ifdef DUMP_BLIF_ECHO
00564  if(!hack_frac_lut_no_legality) /* must have routing graph before dumping blif */
00565         output_blif(    clb, num_clb, global_clocks, is_clock, "post_packing_blif.echo", FALSE);
00566 #endif
00567 
00568  if(hill_climbing_flag)
00569  {
00570          free (hill_climbing_inputs_avail);
00571  }
00572 
00573  for(i = 0; i < num_clb; i++) {
00574          free_cb(clb[i].pb, num_models);
00575         free(clb[i].name);
00576         free(clb[i].nets);
00577         free(clb[i].pb);
00578  }
00579  free(clb);
00580 
00581  free (num_used_instances_type);
00582  free (num_instances_type);
00583  free (unclustered_list_head);
00584  free (memory_pool); 
00585  free (net_output_feeds_driving_block_input);
00586 
00587 }
00588 
00589 /** Returns the number of input pins on this logical_block that must be hooked 
00590  * up through external interconnect.  That is, the number of input   
00591  * pins used - the number which connect (internally) to the outputs.   
00592  */
00593 static int num_ext_inputs (int iblk) {
00594 
00595  int ext_inps, output_net, ipin, opin;
00596 
00597  t_model_ports *port, *out_port;
00598  
00599  /* TODO: process to get ext_inps is slow, should cache in lookup table */
00600  ext_inps = 0;
00601  port = logical_block[iblk].model->inputs;
00602  while(port) {
00603          if(port->is_clock == FALSE) {
00604                  for(ipin = 0; ipin < port->size; ipin++) {
00605                          if(logical_block[iblk].input_nets[port->index][ipin] != OPEN) {
00606                                  ext_inps++;
00607                          }
00608                          out_port = logical_block[iblk].model->outputs;
00609                          while(out_port) {
00610                                  for(opin = 0; opin < out_port->size; opin++) {
00611                                          output_net = logical_block[iblk].output_nets[out_port->index][opin];
00612                                          if(output_net == OPEN)
00613                                                 continue;
00614                                         /* TODO: I could speed things up a bit by computing the number of inputs *
00615                                         * and number of external inputs for each logic logical_block at the start of   *
00616                                         * clustering and storing them in arrays.  Look into if speed is a      *
00617                                         * problem.                                                             */
00618 
00619                                          if (logical_block[iblk].input_nets[port->index][ipin] == output_net) {
00620                                                 ext_inps--;
00621                                                 break;
00622                                         }
00623                                  }
00624                                  out_port = out_port->next;
00625                          }
00626                  }
00627          }
00628          port = port->next;
00629  }
00630 
00631  assert(ext_inps >= 0);
00632  
00633  return (ext_inps);
00634 }
00635 
00636 
00637 /*****************************************/
00638 /** Checks that nets used as clock inputs to latches are never also used 
00639  * as VPACK_LUT inputs.  It's electrically questionable, and more importantly 
00640  * would break the clustering code.                                     
00641  */
00642 static void check_clocks (boolean *is_clock) {
00643 
00644  int inet, iblk, ipin;
00645  t_model_ports *port;
00646 
00647  for (iblk=0;iblk<num_logical_blocks;iblk++) {
00648     if (logical_block[iblk].type != VPACK_OUTPAD) {
00649                 port = logical_block[iblk].model->inputs;
00650                 while(port) {
00651                    for (ipin = 0; ipin < port->size; ipin++) {
00652                            inet = logical_block[iblk].input_nets[port->index][ipin]; 
00653                           if (inet != OPEN) {
00654                                  if (is_clock[inet]) {
00655                                         printf("Error in check_clocks.  Net %d (%s) is a clock, but "
00656                                                    "also\n"
00657                                                    "\tconnects to a logic block input on logical_block %d (%s).\n", inet, 
00658                                                    vpack_net[inet].name, iblk, logical_block[iblk].name);
00659                                         printf("This would break the current clustering "
00660                                                    "implementation and is electrically\n"
00661                                                    "\tquestionable, so clustering has been aborted.\n");
00662                                         exit (1);
00663                                  }
00664                           }
00665                    }
00666                    port = port->next;
00667                 }
00668     }
00669  }
00670 }
00671 
00672 static int get_max_cluster_size(t_pb_type *pb_type) {
00673         int i, j;
00674         int max_size, temp_size;
00675         if (pb_type->modes == 0) {
00676                 max_size = pb_type->num_pb;
00677         } else {
00678                 max_size = 0;
00679                 for(i = 0; i < pb_type->num_modes; i++) {
00680                         temp_size = 0;
00681                         for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
00682                                 temp_size += pb_type->modes[i].pb_type_children[j].num_pb * 
00683                                         get_max_cluster_size(&pb_type->modes[i].pb_type_children[j]);
00684                         }
00685                         if(temp_size > max_size) {
00686                                 max_size = temp_size;
00687                         }
00688                 }
00689         }
00690         return max_size;
00691 }
00692 
00693 
00694 static int get_max_primitive_inputs(t_pb_type *pb_type) {
00695         int i, j;
00696         int max_size, temp_size;
00697         if (pb_type->blif_model != NULL) {
00698                 max_size = pb_type->num_input_pins;
00699         } else {
00700                 max_size = 0;
00701                 for(i = 0; i < pb_type->num_modes; i++) {
00702                         temp_size = 0;
00703                         for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
00704                                 temp_size = get_max_primitive_inputs(&pb_type->modes[i].pb_type_children[j]);
00705                                 if(temp_size > max_size) {
00706                                         max_size = temp_size;
00707                                 }
00708                         }
00709                 }
00710         }
00711         return max_size;
00712 }
00713 
00714 static int get_max_pb_depth(t_pb_type *pb_type) {
00715         int i, j;
00716         int max_depth, temp_depth;
00717         max_depth = pb_type->depth;
00718         for(i = 0; i < pb_type->num_modes; i++) {
00719                 for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
00720                         temp_depth = get_max_pb_depth(&pb_type->modes[i].pb_type_children[j]);
00721                         if(temp_depth > max_depth) {
00722                                 max_depth = temp_depth;
00723                         }
00724                 }
00725         }
00726         return max_depth;
00727 }
00728 
00729 /** Determine if logical block is in pb */
00730 static boolean is_logical_blk_in_pb(int iblk, t_pb *pb) {
00731         t_pb * cur_pb;
00732         cur_pb = logical_block[iblk].pb;
00733         while(cur_pb) {
00734                 if(cur_pb == pb) {
00735                         return TRUE;
00736                 }
00737                 cur_pb = cur_pb->parent_pb;
00738         }
00739         return FALSE;
00740 }
00741 
00742 /** Add blk to list of feasible blocks sorted according to gain */
00743 static void add_blk_to_pb_stats_candidates(int iblk, float *gain, t_pb *pb) {
00744         int i, j;
00745         i = 0;
00746         if(i == pb->pb_stats.num_marked_models) {
00747                 /* Corresponding model not found, add it */
00748                 pb->pb_stats.num_marked_models++;
00749                 pb->pb_stats.num_feasible_blocks[i] = 1;
00750                 pb->pb_stats.feasible_blocks[i][0] = iblk;
00751         } else {
00752                 /* found matching model, bubble sort */
00753                 if(pb->pb_stats.num_feasible_blocks[i] >= AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE - 1) {
00754                         /* maximum size for array, remove smallest gain element and sort */
00755                         if(gain[iblk] > gain[pb->pb_stats.feasible_blocks[i][0]]) {
00756                                 /* single loop insertion sort */
00757                                 for(j = 0; j < pb->pb_stats.num_feasible_blocks[i] - 1; j++) {
00758                                         if(gain[iblk] <= gain[pb->pb_stats.feasible_blocks[i][j + 1]]) {
00759                                                 pb->pb_stats.feasible_blocks[i][j] = iblk;
00760                                                 break;
00761                                         } else {
00762                                                 pb->pb_stats.feasible_blocks[i][j] = pb->pb_stats.feasible_blocks[i][j + 1];
00763                                         }
00764                                 }
00765                                 if(j == pb->pb_stats.num_feasible_blocks[i] - 1) {
00766                                         pb->pb_stats.feasible_blocks[i][j] = iblk;
00767                                 }
00768                         }
00769                 } else {
00770                         /* Expand array and single loop insertion sort */
00771                         for(j = pb->pb_stats.num_feasible_blocks[i] - 1; j >= 0; j--) {
00772                                 if(gain[pb->pb_stats.feasible_blocks[i][j]] > gain[iblk]) {
00773                                         pb->pb_stats.feasible_blocks[i][j + 1] = pb->pb_stats.feasible_blocks[i][j];
00774                                 } else {
00775                                         pb->pb_stats.feasible_blocks[i][j + 1] = iblk;
00776                                         break;
00777                                 }
00778                         }
00779                         if(j < 0) {
00780                                 pb->pb_stats.feasible_blocks[i][0] = iblk;
00781                         }
00782                         pb->pb_stats.num_feasible_blocks[i]++;
00783                 }
00784         }
00785 }
00786 
00787 
00788 
00789 /*****************************************/
00790 /** Allocates the main data structures used for clustering and properly 
00791  * initializes them.                                                   
00792  */
00793 static void alloc_and_init_clustering (boolean global_clocks, 
00794                                                                            float alpha, float beta, 
00795                                                                            int max_cluster_size, 
00796                                                                            int max_primitive_inputs, 
00797                                                                            int max_pb_depth, 
00798                                                                            int max_models) {
00799 
00800  int i, ext_inps, ipin, driving_blk, inet;
00801  struct s_ilink *next_ptr;
00802  
00803  alloc_and_load_cluster_legality_checker();
00804 
00805  for (i=0;i<num_logical_blocks;i++) {
00806         logical_block[i].clb_index = NO_CLUSTER;                        
00807  }
00808 
00809  unclustered_list_head = (struct s_ilink *) my_calloc(max_primitive_inputs + 1, sizeof(struct s_ilink));
00810  unclustered_list_head_size = max_primitive_inputs + 1;
00811 
00812  for (i = 0; i <= max_primitive_inputs; i++) {
00813     unclustered_list_head[i].next = NULL;
00814  }
00815  
00816  
00817  memory_pool = (struct s_ilink *) my_malloc (num_logical_blocks * sizeof (struct s_ilink));
00818  next_ptr = memory_pool;
00819  
00820  for (i=0;i<num_logical_blocks;i++) {
00821    ext_inps = num_ext_inputs(i);
00822    next_ptr->next = unclustered_list_head[ext_inps].next;
00823    unclustered_list_head[ext_inps].next = next_ptr;
00824    next_ptr->iblk = i;
00825    next_ptr++;
00826  }
00827 
00828 
00829  net_output_feeds_driving_block_input = (int *) my_malloc (
00830                  num_logical_nets * sizeof (int));
00831 
00832         for (inet=0;inet<num_logical_nets;inet++) {
00833                 net_output_feeds_driving_block_input[inet] = 0;
00834                 driving_blk = vpack_net[inet].node_block[0];
00835                 for (ipin=1;ipin<=vpack_net[inet].num_sinks;ipin++) {
00836                         if (vpack_net[inet].node_block[ipin] == driving_blk) {
00837                                 net_output_feeds_driving_block_input[inet]++;
00838                         }
00839                 }
00840         }
00841 }
00842 
00843 
00844 /*****************************************/
00845 static void free_pb_stats_recursive (t_pb *pb, int max_models) {
00846 
00847         int i, j;
00848         /* Releases all the memory used by clustering data structures.   */
00849         if(pb) {
00850                 if(pb->pb_graph_node != NULL) {
00851                         if(pb->pb_graph_node->pb_type->num_modes != 0) {
00852                                 for(i = 0; i < pb->pb_graph_node->pb_type->modes[pb->mode].num_pb_type_children; i++) {
00853                                         for(j = 0; j < pb->pb_graph_node->pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) {
00854                                                 if(pb->child_pbs && pb->child_pbs[i]) {
00855                                                         free_pb_stats_recursive (&pb->child_pbs[i][j], max_models);
00856                                                 }
00857                                         }
00858                                 }
00859                         }
00860                 }
00861                 free_pb_stats(pb->pb_stats, max_models);
00862         }
00863 }
00864 
00865 
00866 /*****************************************/
00867 /** Checks if logical_block iblk could be adding to the open cluster without 
00868  * violating clocking constraints.  Returns TRUE if it's OK.  Some  
00869  * parameters are unused since this function needs the same inter-  
00870  * face as the other feasibility checkers.                          
00871  */
00872 static boolean outputs_clocks_and_models_feasible (enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb) {
00873 
00874  int inet, depth, clocks_avail;
00875  t_model_ports *port;
00876  int ipin, output_net, outputs_avail;
00877  boolean clocks_feasible, outputs_feasible;
00878  t_pb * temp_pb;
00879 
00880  /* Assumptions: 1. Clock network unique, can only connect to clock network */
00881  /*              2. Logic block output can't internally connect to clocks. */
00882  clocks_feasible = outputs_feasible = TRUE;
00883  temp_pb = cur_pb;
00884  while(temp_pb && clocks_feasible && outputs_feasible) {
00885         depth = temp_pb->pb_graph_node->pb_type->depth;
00886 
00887         clocks_avail = cur_pb->pb_stats.clocks_avail;
00888         if(clocks_avail == NOT_VALID) {
00889                 clocks_avail = temp_pb->pb_graph_node->pb_type->num_clock_pins;
00890         }
00891 
00892          inet = logical_block[iblk].clock_net;
00893          if (inet != OPEN) {
00894                  if (temp_pb->pb_stats.num_pins_of_net_in_pb[inet] == 0) {
00895                    clocks_avail--;
00896                 }   
00897                 else if (temp_pb->pb_stats.num_pins_of_net_in_pb[inet] == 1 &&
00898                    temp_pb->pb_stats.net_output_in_pb[inet]) {
00899                    clocks_avail--;
00900                 }   
00901          }
00902 
00903          
00904          outputs_avail = temp_pb->pb_stats.outputs_avail;
00905 
00906          port = logical_block[iblk].model->outputs;
00907          while(port) {
00908                 /* Outputs that connect to internal blocks free up an input pin. */
00909                  for(ipin = 0; ipin < port->size; ipin++) {
00910                          output_net = logical_block[iblk].output_nets[port->index][ipin];
00911                          if (output_net != OPEN) {
00912                                  if(temp_pb->pb_stats.num_pins_of_net_in_pb[output_net] >= vpack_net[output_net].num_sinks - net_output_feeds_driving_block_input[output_net]) {
00913                                          if((temp_pb->pb_stats.num_pins_of_net_in_pb[output_net] != vpack_net[output_net].num_sinks - net_output_feeds_driving_block_input[output_net])) {
00914                                                  printf("[INTERNAL ERROR] net %d %s %d != %d\n", output_net, vpack_net[output_net].name,
00915                                                          temp_pb->pb_stats.num_pins_of_net_in_pb[output_net], vpack_net[output_net].num_sinks - net_output_feeds_driving_block_input[output_net]);
00916                                          }
00917                                          assert(temp_pb->pb_stats.num_pins_of_net_in_pb[output_net] == vpack_net[output_net].num_sinks - net_output_feeds_driving_block_input[output_net]);
00918                                  } else {
00919                                          outputs_avail--;
00920                                  }
00921                          }
00922                  }
00923                  port = port->next;
00924          }
00925 
00926          port = logical_block[iblk].model->inputs;
00927          while(port) {
00928                  if(port->is_clock == TRUE) {
00929                          port = port->next;
00930                          continue;
00931                  }
00932                  for (ipin = 0; ipin < port->size; ipin++) {
00933                         inet = logical_block[iblk].input_nets[port->index][ipin];
00934                         if(inet != OPEN) {
00935                                 if(temp_pb->pb_stats.net_output_in_pb[inet] && 
00936                                         temp_pb->pb_stats.num_pins_of_net_in_pb[inet] + net_output_feeds_driving_block_input[inet] == vpack_net[inet].num_sinks) {
00937                                         outputs_avail++;
00938                                 }
00939                         }
00940                  }
00941                 port = port->next;
00942          }
00943 
00944          clocks_feasible = (clocks_avail >= 0); 
00945          outputs_feasible = (outputs_avail >= 0);
00946 
00947          temp_pb = temp_pb->parent_pb;
00948  }
00949 
00950  if(models_feasible(packer_algorithm, iblk, cur_pb->pb_graph_node->pb_type, cur_pb, cur_pb->mode)) {
00951          if (clocks_feasible && outputs_feasible) 
00952                 return (TRUE);
00953          else
00954                 return (FALSE);
00955  } else {
00956          return FALSE;
00957  } 
00958 }
00959 
00960 
00961 static boolean models_feasible(enum e_packer_algorithm packer_algorithm, int iblk, const t_pb_type *cur_pb_type, t_pb *cur_pb, int mode) {
00962         struct s_linked_vptr *cur_model;
00963         int i, j, k, cur_ptr;
00964         struct s_ilink *ptr;
00965         boolean feasible;
00966         const t_pb_type *child_pb_type;
00967         
00968         cur_model = cur_pb_type->models_contained;
00969 
00970 
00971         assert(cur_pb == NULL || cur_pb->pb_graph_node->pb_type == cur_pb_type);
00972         assert(cur_pb == NULL || cur_pb->mode == mode);
00973 
00974         feasible = FALSE;
00975         while(cur_model) {
00976                 if(cur_model->data_vptr == logical_block[iblk].model) {
00977                         feasible = TRUE;
00978                         break;
00979                 }
00980                 cur_model = cur_model->next;
00981         }
00982 
00983         if(!feasible) {
00984                 return FALSE;
00985         }
00986         if(packer_algorithm == PACK_BRUTE_FORCE) {
00987                 /* let the brute force packer determine if an empty slot exists */
00988                 return TRUE;
00989         }
00990 
00991         feasible = FALSE;
00992 
00993         if(cur_pb_type->num_modes == 0) {
00994                 if(hack_force_safe_latch) {
00995                         /* TODO: Either remove hack or actually make it a feature properly
00996                         
00997                            hack to make the LUT get packed before the LATCH gets packed for cases that the LATCH can be absorbed
00998                            for fracturable LUT architectures
00999                         */
01000                         if(strcmp(logical_block[iblk].model->name, "latch") == 0) {
01001                                 i = logical_block[iblk].input_nets[0][0];
01002                                 j = vpack_net[i].node_block[0];
01003                                 if(logical_block[j].clb_index == NO_CLUSTER && vpack_net[i].num_sinks == 1 && 
01004                                         strcmp(logical_block[j].model->name, "names") == 0) {
01005                                         cur_ptr = logical_block[j].model->inputs[0].size;
01006                                         ptr = unclustered_list_head[cur_ptr].next;
01007                                         while(ptr == NULL && cur_ptr > 0) {
01008                                                 cur_ptr--;
01009                                                 ptr = unclustered_list_head[cur_ptr].next;
01010                                         }
01011                                         if (cur_ptr > 2)
01012                                                 return FALSE;
01013                                 }
01014                         }
01015                 }
01016 
01017                 return primitive_type_feasible(iblk, cur_pb_type, NULL, OPEN);
01018         }
01019 
01020         for(i = 0; i < cur_pb_type->modes[mode].num_pb_type_children; i++) {
01021                 child_pb_type = &cur_pb_type->modes[mode].pb_type_children[i];
01022                 if(cur_pb) {
01023                         for(k = 0; k < child_pb_type->num_pb && cur_pb->child_pbs && cur_pb->child_pbs[i]; k++) {
01024                                 if(cur_pb->child_pbs[i][k].name == NULL) {
01025                                         break;
01026                                 }
01027                         }
01028                         if(k == child_pb_type->num_pb) { 
01029                                 /* no free child */
01030                                 continue;
01031                         }
01032                 }
01033                 if(child_pb_type->num_modes == 0) {
01034                         feasible = primitive_type_feasible(iblk, &cur_pb_type->modes[mode].pb_type_children[i], NULL, OPEN);
01035                         if(feasible) {
01036                                 return TRUE;
01037                         }
01038                 } else {
01039                         for(j = 0; j < cur_pb_type->modes[mode].pb_type_children[i].num_modes; j++) {
01040                                 feasible = models_feasible(packer_algorithm, iblk, &cur_pb_type->modes[mode].pb_type_children[i], NULL, j);
01041                                 if(feasible) {
01042                                         return TRUE;
01043                                 }
01044                         }
01045                 }
01046         }
01047 
01048         return FALSE;
01049 }
01050 
01051 static boolean primitive_feasible(int iblk, t_pb *cur_pb) {
01052         const t_pb_type *cur_pb_type;
01053         int i;
01054         t_pb *memory_class_pb; /* Used for memory class only, for memories, open pins must be the same among siblings */
01055         int sibling_memory_blk;
01056         
01057         cur_pb_type = cur_pb->pb_graph_node->pb_type;
01058         memory_class_pb = NULL;
01059         sibling_memory_blk = OPEN;
01060 
01061         assert(cur_pb_type->num_modes == 0); /* primitive */
01062         if(cur_pb->logical_block != OPEN) {
01063                 /* This pb already has a logical block */
01064                 return FALSE;
01065         }
01066 
01067         if(cur_pb_type->class_type == MEMORY_CLASS) {
01068                 /* memory class is special, all siblings must share all nets, including open nets, with the exception of data nets */
01069                 /* find sibling if one exists */
01070                 memory_class_pb = cur_pb->parent_pb;
01071                 for(i = 0; i < cur_pb_type->parent_mode->num_pb_type_children; i++) {
01072                         if(memory_class_pb->child_pbs[cur_pb->mode][i].name != NULL && memory_class_pb->child_pbs[cur_pb->mode][i].logical_block != OPEN) {
01073                                 sibling_memory_blk = memory_class_pb->child_pbs[cur_pb->mode][i].logical_block;
01074                         }
01075                 }
01076                 if(sibling_memory_blk == OPEN) {
01077                         memory_class_pb = NULL;
01078                 }
01079         }
01080 
01081         return primitive_type_feasible(iblk, cur_pb_type, memory_class_pb, sibling_memory_blk);
01082 }
01083 
01084 static boolean primitive_type_feasible(int iblk, const t_pb_type *cur_pb_type, t_pb *memory_class_pb, int sibling_memory_blk) {
01085         t_model_ports *port;
01086         int i, j;
01087         boolean second_pass;
01088 
01089         /* check if ports are big enough */
01090         /* for memories, also check that pins are the same with existing siblings */
01091         port = logical_block[iblk].model->inputs;
01092         second_pass = FALSE;
01093         while(port || !second_pass) {
01094                 /* TODO: This is slow if the number of ports are large, fix if becomes a problem */
01095                 if(!port) {
01096                         second_pass = TRUE;
01097                         port = logical_block[iblk].model->outputs;
01098                 }
01099                 for(i = 0; i < cur_pb_type->num_ports; i++) {
01100                         if(cur_pb_type->ports[i].model_port == port) {
01101                                 /* TODO: This is slow, I only need to check from 0 if it is a memory block, other blocks I can check from port->size onwards */
01102                                 for(j = 0; j < port->size; j++) {
01103                                         if(port->dir == IN_PORT && !port->is_clock) {
01104                                                 if(memory_class_pb) {
01105                                                         if(strstr(cur_pb_type->ports[i].port_class, "data") != cur_pb_type->ports[i].port_class) {
01106                                                                 if(logical_block[iblk].input_nets[port->index][j] != logical_block[sibling_memory_blk].input_nets[port->index][j]) {
01107                                                                         return FALSE;
01108                                                                 }
01109                                                         }
01110                                                 }
01111                                                 if(logical_block[iblk].input_nets[port->index][j] != OPEN && j >= cur_pb_type->ports[i].num_pins) {
01112                                                         return FALSE;
01113                                                 }
01114                                         } else if(port->dir == OUT_PORT) {
01115                                                 if(memory_class_pb) {
01116                                                         if(strstr(cur_pb_type->ports[i].port_class, "data") != cur_pb_type->ports[i].port_class) {
01117                                                                 if(logical_block[iblk].output_nets[port->index][j] != logical_block[sibling_memory_blk].output_nets[port->index][j]) {
01118                                                                         return FALSE;
01119                                                                 }
01120                                                         }
01121                                                 }
01122                                                 if(logical_block[iblk].output_nets[port->index][j] != OPEN && j >= cur_pb_type->ports[i].num_pins) {
01123                                                         return FALSE;
01124                                                 }
01125                                         } else {
01126                                                 assert(port->dir == IN_PORT && port->is_clock);
01127                                                 assert(j == 0);
01128                                                 if(memory_class_pb) {
01129                                                         if(logical_block[iblk].clock_net != logical_block[sibling_memory_blk].clock_net) {
01130                                                                 return FALSE;
01131                                                         }
01132                                                 }
01133                                                 if(logical_block[iblk].clock_net != OPEN && j >= cur_pb_type->ports[i].num_pins) {
01134                                                         return FALSE;
01135                                                 }
01136                                         }
01137                                 }
01138                                 break;
01139                         }
01140                 }
01141                 if(i == cur_pb_type->num_ports) {
01142                         if((logical_block[iblk].model->inputs != NULL && !second_pass) ||
01143                                 logical_block[iblk].model->outputs != NULL && second_pass) {
01144                                 /* physical port not found */
01145                                 return FALSE;
01146                         }
01147                 }
01148                 if(port) {
01149                         port = port->next;
01150                 }
01151         }
01152         return TRUE;
01153 }
01154 
01155 /*****************************************/
01156 /** This routine returns a logical_block which has not been clustered, has  
01157  * no connection to the current cluster, satisfies the cluster     
01158  * clock constraints, is a valid subblock inside the cluster, does not exceed the cluster subblock units available,
01159  *      and has ext_inps external inputs.  If        
01160  * there is no such logical_block it returns NO_CLUSTER.  Remove_flag      
01161  * controls whether or not blocks that have already been clustered 
01162  * are removed from the unclustered_list data structures.  NB:     
01163  * to get a logical_block regardless of clock constraints just set clocks_avail > 0.                                                      
01164  */
01165 static int get_logical_block_by_num_ext_inputs (            INP enum e_packer_algorithm packer_algorithm,
01166                                                                                                                 INOUTP t_pb *cur_pb, 
01167                                                                                                                 INP int ext_inps, 
01168                                                                                                                 INP enum e_removal_policy remove_flag) {
01169 
01170  struct s_ilink *ptr, *prev_ptr;
01171  int ilogical_blk;
01172  
01173  prev_ptr = &unclustered_list_head[ext_inps];
01174  ptr = unclustered_list_head[ext_inps].next;
01175  
01176  while (ptr != NULL) {
01177          ilogical_blk = ptr->iblk;
01178          /* TODO: Get better candidate logical block in future, eg. return most timing critical or some other smarter metric */
01179          if (logical_block[ilogical_blk].clb_index == NO_CLUSTER) {
01180                 if (outputs_clocks_and_models_feasible(packer_algorithm, ilogical_blk, NULL, cur_pb)) {
01181                         return ilogical_blk;
01182                 }
01183 
01184        prev_ptr = ptr;
01185 
01186     }
01187 
01188     else if (remove_flag == REMOVE_CLUSTERED) {
01189        prev_ptr->next = ptr->next;
01190     }
01191 
01192     ptr = ptr->next;
01193  }
01194 
01195  return NO_CLUSTER;
01196 }
01197 
01198 
01199 /*****************************************/
01200 /** This routine is used to find new blocks for clustering when there are no feasible       
01201  * blocks with any attraction to the current cluster (i.e. it finds       
01202  * blocks which are unconnected from the current cluster).  It returns    
01203  * the logical_block with the largest number of used inputs that satisfies the    
01204  * clocking and number of inputs constraints.  If no suitable logical_block is    
01205  * found, the routine returns NO_CLUSTER.                                 
01206  */
01207 static int get_free_logical_block_with_most_ext_inputs_for_cluster (INP enum e_packer_algorithm packer_algorithm,
01208                                                                                                                                         INOUTP t_pb *cur_pb) {
01209  int ext_inps;
01210  int best_block;
01211 
01212  int inputs_avail;
01213 
01214  inputs_avail = cur_pb->pb_stats.inputs_avail;
01215  if(inputs_avail == NOT_VALID) {
01216          inputs_avail = cur_pb->pb_graph_node->pb_type->num_input_pins;
01217  }
01218 
01219  best_block = NO_CLUSTER;
01220 
01221  if(inputs_avail >= unclustered_list_head_size) {
01222          inputs_avail = unclustered_list_head_size - 1;
01223  }
01224  
01225  for (ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) {
01226     best_block = get_logical_block_by_num_ext_inputs (packer_algorithm, cur_pb, ext_inps, REMOVE_CLUSTERED);
01227         if (best_block != NO_CLUSTER) {
01228        break;
01229         }
01230  }
01231  return best_block;
01232 }
01233 
01234 /*****************************************/
01235 /** This routine is used to find the first seed logical_block for the clustering.  It returns    
01236  * the logical_block with the largest number of used inputs that satisfies the    
01237  * clocking and number of inputs constraints.  If no suitable logical_block is    
01238  * found, the routine returns NO_CLUSTER.                                 
01239  */
01240 static int get_seed_logical_block_with_most_ext_inputs (int max_primitive_inputs) {
01241 
01242  int iblk, ext_inps;
01243  struct s_ilink *ptr, *prev_ptr;
01244 
01245  iblk = NO_CLUSTER;
01246  
01247  for (ext_inps=max_primitive_inputs;ext_inps>=0;ext_inps--) {
01248         prev_ptr = &unclustered_list_head[ext_inps];
01249         ptr = unclustered_list_head[ext_inps].next;
01250         
01251         while (ptr != NULL) {
01252                 iblk = ptr->iblk;
01253                         
01254                 if (logical_block[iblk].clb_index == NO_CLUSTER) {
01255                         prev_ptr->next = ptr->next; /* remove blk from list */
01256                         break;
01257                 } else {
01258                         iblk = NO_CLUSTER;
01259                 }
01260 
01261                 prev_ptr = ptr;
01262                 ptr = ptr->next;
01263         }
01264 
01265     if (iblk != NO_CLUSTER) 
01266        return (iblk);
01267  }
01268 
01269  return (NO_CLUSTER);   /* No suitable logical_block found. */
01270 }
01271 
01272 
01273 /*****************************************/
01274 /* Call this routine when starting to fill up a new cluster.  It resets 
01275  * the gain vector, etc.                                                
01276  */
01277 static void alloc_and_load_pb_stats (t_pb *pb, int max_models, int max_cluster_size, int max_primitive_inputs) {
01278 
01279  int j;
01280 
01281 /* If statement below is for speed.  If nets are reasonably low-fanout,  *
01282  * only a relatively small number of blocks will be marked, and updating *
01283  * only those logical_block structures will be fastest.  If almost all blocks    *
01284  * have been touched it should be faster to just run through them all    *
01285  * in order (less addressing and better cache locality).                 */
01286 
01287 
01288         pb->pb_stats.gain = (float *) my_malloc (num_logical_blocks * sizeof (float));
01289         pb->pb_stats.lengthgain=(float*) my_malloc (num_logical_blocks * sizeof (float));
01290         pb->pb_stats.sharinggain=(float*) my_malloc (num_logical_blocks * sizeof (float));
01291         pb->pb_stats.hillgain =(float*) my_malloc (num_logical_blocks * sizeof (float));
01292         pb->pb_stats.connectiongain = (float*) my_malloc (num_logical_blocks * sizeof (float));
01293         pb->pb_stats.inputs_avail = NOT_VALID;
01294         pb->pb_stats.outputs_avail = NOT_VALID;
01295         pb->pb_stats.clocks_avail = NOT_VALID;
01296         pb->pb_stats.num_marked_models = 0;
01297         pb->pb_stats.cur_marked_model = NOT_VALID;
01298         pb->pb_stats.num_feasible_blocks = my_calloc(max_models, sizeof(int));
01299         pb->pb_stats.feasible_blocks = my_malloc(max_models * sizeof(int*));
01300 
01301          
01302         for (j=0;j<max_models;j++) {
01303                 pb->pb_stats.feasible_blocks[j] = (int*) my_malloc (AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE * sizeof (int));
01304         }
01305 
01306         for (j=0;j<num_logical_blocks;j++) {
01307                 pb->pb_stats.gain[j] = 0.0;
01308                 pb->pb_stats.lengthgain[j]= 0.0;
01309                 pb->pb_stats.connectiongain[j] = 0;
01310                 pb->pb_stats.sharinggain[j]=NOT_VALID;
01311                 pb->pb_stats.hillgain[j]=NOT_VALID;
01312         }
01313 
01314         pb->pb_stats.num_pins_of_net_in_pb = (int *) my_malloc (num_logical_nets * sizeof(int));
01315         pb->pb_stats.net_output_in_pb = (boolean *) my_malloc (num_logical_nets * sizeof(boolean));
01316 
01317         for (j=0;j<num_logical_nets;j++) {
01318                 pb->pb_stats.num_pins_of_net_in_pb[j] = 0;
01319                 pb->pb_stats.net_output_in_pb[j] = FALSE;
01320         }
01321          
01322         pb->pb_stats.marked_nets = (int *) my_malloc ((max_primitive_inputs + 2) * max_cluster_size * sizeof(int));
01323         pb->pb_stats.marked_blocks = (int *) my_malloc (num_logical_blocks * sizeof(int));
01324 
01325         pb->pb_stats.num_marked_nets = 0;
01326         pb->pb_stats.num_marked_blocks = 0;
01327 
01328 }
01329 /*****************************************/
01330 
01331 /** Allocate space within pb and load data for it given the current logical block. 
01332  *      Note: pb space and parent info must be loaded before calling this function
01333  *
01334  * TODO: add-in some heuristic for determing which pb to select if there are multiple choices
01335  */
01336 static enum e_block_pack_status alloc_and_load_pb(INP enum e_packer_algorithm packer_algorithm, INP t_pb_graph_node *pb_graph_node, INP int iblock, 
01337                                                           INOUTP t_pb * pb, INP int max_models, int max_cluster_size, int max_primitive_inputs) {
01338         int i, j, k;
01339         const t_pb_type *pb_type;
01340 
01341         boolean is_primitive;
01342         enum e_block_pack_status block_pack_status;
01343 
01344         pb->pb_graph_node = pb_graph_node;
01345         pb->logical_block = NO_CLUSTER;
01346 
01347         block_pack_status = BLK_STATUS_UNDEFINED;
01348 
01349         if(iblock == NO_CLUSTER) {
01350                 return BLK_FAILED_FEASIBLE;
01351         }
01352 
01353         pb_type = pb_graph_node->pb_type;
01354                 
01355         is_primitive = (pb_type->num_modes == 0);
01356 
01357         /* TODO: It would be good to save and prune away nodes of types that I know won't fit, that way, I don't have to traverse the tree
01358         for types that don't work, this runtime optimization can be done later */
01359         if(is_primitive) { 
01360                 if(!primitive_feasible(iblock, pb)) {
01361                         return BLK_FAILED_FEASIBLE;
01362                 }
01363                 assert(pb->logical_block == OPEN);
01364                 /* check if I can add and route this block, if they yes, then success, if not, fail */
01365                 if(hack_frac_lut_no_legality) {
01366                         block_pack_status = BLK_PASSED;
01367                         pb->logical_block = iblock;
01368                         logical_block[iblock].pb = pb;
01369                 } else {
01370                         block_pack_status = try_add_block_to_current_cluster_and_primitive(iblock, pb);
01371                 }
01372                 if(block_pack_status == BLK_PASSED) {
01373                         pb->name = my_strdup(logical_block[iblock].name);
01374                 } 
01375         } else {
01376                 if(pb->child_pbs == NULL) {
01377                         /* first time here so must allocate space and determine mode of operation */
01378                         for(i = 0; i < pb_type->num_modes && block_pack_status != BLK_PASSED; i++) {
01379                                 pb->mode = i;
01380                                 if(!models_feasible(packer_algorithm, iblock, pb_type, pb, pb->mode)) {
01381                                         continue;
01382                                 }
01383                                 set_pb_mode(pb, 0, 0); /* default mode is to use mode 1 */
01384                                 set_pb_mode(pb, i, 1);
01385                                 pb->child_pbs = my_calloc(pb_type->modes[i].num_pb_type_children, sizeof(t_pb *));
01386 
01387                                 for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
01388                                         /* allocate space for children if first time allocating space */
01389                                         pb->child_pbs[j] = my_calloc(pb_type->modes[i].pb_type_children[j].num_pb, sizeof(t_pb));
01390                                         for(k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb; k++) {
01391                                                 pb->child_pbs[j][k].parent_pb = pb;
01392                                                 alloc_and_load_pb_stats(&pb->child_pbs[j][k], max_models, max_cluster_size, max_primitive_inputs);
01393                                         }
01394                                 }
01395                                 for(j = 0; j < pb_type->modes[i].num_pb_type_children && block_pack_status != BLK_PASSED; j++) {
01396                                         for(k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb && block_pack_status != BLK_PASSED; k++) {
01397                                                 block_pack_status = alloc_and_load_pb(packer_algorithm, &pb_graph_node->child_pb_graph_nodes[i][j][k], iblock, &pb->child_pbs[j][k], max_models, max_cluster_size, max_primitive_inputs);
01398                                                 if(block_pack_status == BLK_FAILED_FEASIBLE) {
01399                                                         /* sibling blocks differ only in interconnect, 
01400                                                           if feasibility fails then it is failing on something other than interconnect
01401                                                           therefore, no point trying the same thing for siblings */
01402                                                         break;
01403                                                 }
01404                                         }
01405                                 }
01406                                 if(block_pack_status == BLK_PASSED) {
01407                                         pb->name = my_strdup(logical_block[iblock].name);
01408                                 } else {
01409                                         set_pb_mode(pb, i, 0); /* default mode is to use mode 1 */
01410                                         set_pb_mode(pb, 0, 1);
01411                                         for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
01412                                                 if(pb->child_pbs[j] != NULL) {
01413                                                         for(k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb; k++) {
01414                                                                 free_pb_stats(pb->child_pbs[j][k].pb_stats, max_models);
01415                                                         }
01416                                                         free(pb->child_pbs[j]);
01417                                                 }
01418                                         }
01419                                         free(pb->child_pbs);
01420                                         pb->child_pbs = NULL;
01421                                 }
01422                         }
01423                 } else {
01424                         /* 
01425                          * Another block has been allocated inside so must select placement carefully 
01426                          */
01427                         i = pb->mode;
01428                         if(!models_feasible(packer_algorithm, iblock, pb_type, pb, pb->mode)) {
01429                                 block_pack_status = BLK_FAILED_FEASIBLE;
01430                         } else {
01431                                 for(j = 0; j < pb_type->modes[i].num_pb_type_children && block_pack_status != BLK_PASSED; j++) {
01432                                         for(k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb && block_pack_status != BLK_PASSED; k++) {
01433                                                 /* Simple heuristic: assume that any packed cluster is well-packed, try to pack into new child only */
01434                                                 /* Brute force heuristic: traverse whole tree always */
01435                                                 if(pb->child_pbs[j][k].name == NULL) {
01436                                                         block_pack_status = alloc_and_load_pb(packer_algorithm, &pb_graph_node->child_pb_graph_nodes[i][j][k], iblock, &pb->child_pbs[j][k], max_models, max_cluster_size, max_primitive_inputs);
01437                                                 } else if (packer_algorithm == PACK_BRUTE_FORCE && pb_type->modes[i].pb_type_children[j].num_modes != 0) {
01438                                                         block_pack_status = alloc_and_load_pb(packer_algorithm, &pb_graph_node->child_pb_graph_nodes[i][j][k], iblock, &pb->child_pbs[j][k], max_models, max_cluster_size, max_primitive_inputs);
01439                                                 }
01440                                                 if(block_pack_status == BLK_FAILED_FEASIBLE && packer_algorithm != PACK_BRUTE_FORCE) {
01441                                                         /* Children blocks differ only in routing, don't continue if feasibility fails 
01442                                                            Brute force algorithm may have just ran into a node that was full, therefore it should not break out
01443                                                         */
01444                                                         break;
01445                                                 }
01446                                         }
01447                                 }
01448                         }
01449                 }
01450         }
01451 
01452         return block_pack_status;
01453 }
01454 
01455 /** This function is called when the connectiongain values on the vpack_net
01456  * inet require updating.   */
01457 static void update_connection_gain_values (int inet, int clustered_block, t_pb *cur_pb, enum e_net_relation_to_clustered_block net_relation_to_clustered_block) {
01458 
01459   int iblk, ipin;
01460   int clb_index;
01461   int num_internal_connections, num_open_connections, num_stuck_connections;
01462 
01463   num_internal_connections = num_open_connections = num_stuck_connections = 0;
01464 
01465   clb_index = logical_block[clustered_block].clb_index;
01466 
01467   /* may wish to speed things up by ignoring clock nets since they are high fanout */
01468   
01469   for (ipin=0;ipin<=vpack_net[inet].num_sinks;ipin++) {
01470         iblk = vpack_net[inet].node_block[ipin];
01471         if (logical_block[iblk].clb_index == clb_index && is_logical_blk_in_pb(iblk, logical_block[clustered_block].pb)) {
01472                 num_internal_connections++;
01473         } else if (logical_block[iblk].clb_index == OPEN) {
01474                 num_open_connections++;
01475         } else {
01476                 num_stuck_connections++;
01477         }
01478   }
01479 
01480   if (net_relation_to_clustered_block==OUTPUT){
01481     for (ipin=1;ipin<=vpack_net[inet].num_sinks;ipin++) {
01482                 iblk = vpack_net[inet].node_block[ipin];
01483                 if (logical_block[iblk].clb_index == NO_CLUSTER) {
01484                         /* Only works if net has one connection to block, TODO: handle case where net has multi-connection to block */
01485                         if(num_internal_connections > 1) {
01486                                 cur_pb->pb_stats.connectiongain[iblk] -= 1 /(float)(vpack_net[inet].num_sinks - (num_internal_connections - 1) + 1 + 1 * num_stuck_connections);
01487                         }
01488                         cur_pb->pb_stats.connectiongain[iblk] += 1 /(float)(vpack_net[inet].num_sinks - num_internal_connections + 1 + 1 * num_stuck_connections);
01489                 }
01490     }
01491   }
01492 
01493   if(net_relation_to_clustered_block==INPUT){ 
01494     /*Calculate the connectiongain for the logical_block which is driving *
01495      *the vpack_net that is an input to a logical_block in the cluster */
01496 
01497           iblk=vpack_net[inet].node_block[0];
01498     if (logical_block[iblk].clb_index == NO_CLUSTER) {
01499                 if(num_internal_connections > 1) {
01500                         cur_pb->pb_stats.connectiongain[iblk] -= 1 / (float)(vpack_net[inet].num_sinks - (num_internal_connections - 1) + 1 + 1 * num_stuck_connections);
01501                 }
01502                 cur_pb->pb_stats.connectiongain[iblk] += 1 / (float)(vpack_net[inet].num_sinks - num_internal_connections + 1 + 1 * num_stuck_connections);
01503     }
01504   }
01505 }
01506 /*****************************************/
01507 /** This function is called when the length_gain values on the vpack_net
01508  * inet requires updating.   
01509  */
01510 static void update_length_gain_values (int inet, int clustered_block, t_pb *cur_pb,
01511                                            enum  e_net_relation_to_clustered_block net_relation_to_clustered_block) {
01512 
01513   float lengain;
01514   int newblk, ifirst;
01515   int iblk, ipin;
01516 
01517 /* Check if this vpack_net lists its driving logical_block twice.  If so, avoid  *
01518  * double counting this logical_block by skipping the first (driving) pin. */
01519    if (net_output_feeds_driving_block_input[inet] == FALSE) 
01520     ifirst = 0;
01521   else
01522     ifirst = 1;
01523 
01524   if (net_relation_to_clustered_block == OUTPUT && !vpack_net[inet].is_global){
01525           for (ipin=ifirst; ipin <= vpack_net[inet].num_sinks; ipin++) {
01526                 iblk = vpack_net[inet].node_block[ipin];
01527                 if (logical_block[iblk].clb_index == NO_CLUSTER) {
01528                         lengain=net_pin_backward_criticality[inet][ipin];
01529                         if (lengain>cur_pb->pb_stats.lengthgain[iblk])
01530                                 cur_pb->pb_stats.lengthgain[iblk]=lengain;
01531                 }
01532           }
01533   }
01534 
01535   if(net_relation_to_clustered_block == INPUT && !vpack_net[inet].is_global){ 
01536     /*Calculate the length gain for the logical_block which is driving *
01537      *the vpack_net that is an input to a logical_block in the cluster */
01538           newblk = vpack_net[inet].node_block[0];               
01539           if (logical_block[newblk].clb_index == NO_CLUSTER) {
01540                   for (ipin=1; ipin <= vpack_net[inet].num_sinks; ipin++) {
01541                         lengain=net_pin_forward_criticality[inet][ipin];
01542                         if (lengain>cur_pb->pb_stats.lengthgain[newblk])
01543                                 cur_pb->pb_stats.lengthgain[newblk]=lengain;
01544                         
01545                   }
01546           }
01547   }
01548 }
01549 /*****************************************/
01550 /** Updates the marked data structures, and if gain_flag is GAIN,  
01551  * the gain when a logic logical_block is added to a cluster.  The        
01552  * sharinggain is the number of inputs that a logical_block shares with   
01553  * blocks that are already in the cluster. Hillgain is the        
01554  * reduction in number of pins-required by adding a logical_block to the  
01555  * cluster. The lengthgain is the criticality of the most critical
01556  * vpack_net between this logical_block and a logical_block in the cluster.             
01557  */
01558 static void mark_and_update_partial_gain (int inet, 
01559        enum e_gain_update gain_flag, int clustered_block, int port_on_clustered_block, int pin_on_clustered_block,
01560        boolean timing_driven, boolean connection_driven, enum e_net_relation_to_clustered_block net_relation_to_clustered_block) {
01561 
01562 
01563  int iblk, ipin, ifirst;
01564  t_pb *cur_pb;
01565 
01566  cur_pb = logical_block[clustered_block].pb->parent_pb;
01567 
01568  while(cur_pb) {
01569          /* Mark vpack_net as being visited, if necessary. */
01570 
01571          if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == 0) {
01572                 cur_pb->pb_stats.marked_nets[cur_pb->pb_stats.num_marked_nets] = inet;
01573                 cur_pb->pb_stats.num_marked_nets++;
01574          }
01575 
01576         /* Update gains of affected blocks. */
01577 
01578          if (gain_flag == GAIN) {
01579 
01580         /* Check if this vpack_net lists its driving logical_block twice.  If so, avoid  *
01581          * double counting this logical_block by skipping the first (driving) pin. */
01582           
01583                 if (net_output_feeds_driving_block_input[inet] == 0) 
01584                    ifirst = 0;
01585                 else
01586                    ifirst = 1;
01587                
01588            if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet]==0) {
01589                    for (ipin=ifirst;ipin<=vpack_net[inet].num_sinks;ipin++) {
01590                            iblk = vpack_net[inet].node_block[ipin];
01591                            if (logical_block[iblk].clb_index == NO_CLUSTER) {
01592 
01593                  if (cur_pb->pb_stats.sharinggain[iblk] == NOT_VALID) {
01594                    cur_pb->pb_stats.marked_blocks[cur_pb->pb_stats.num_marked_blocks] = iblk;
01595                    cur_pb->pb_stats.num_marked_blocks++;
01596                    cur_pb->pb_stats.sharinggain[iblk]=1;
01597                    cur_pb->pb_stats.hillgain[iblk] = 1 - num_ext_inputs (iblk);
01598                  }
01599                  else {
01600                    cur_pb->pb_stats.sharinggain[iblk]++;
01601                    cur_pb->pb_stats.hillgain[iblk]++;
01602                  }
01603                    }
01604                  }
01605            }
01606 
01607            if (connection_driven) {
01608                    update_connection_gain_values(inet, clustered_block, cur_pb, net_relation_to_clustered_block);                  
01609            }
01610            
01611            if (timing_driven) {
01612                  update_length_gain_values(inet, clustered_block, cur_pb, net_relation_to_clustered_block);
01613            }           
01614          }
01615             
01616          cur_pb->pb_stats.num_pins_of_net_in_pb[inet]++;
01617          cur_pb = cur_pb->parent_pb;
01618  }
01619 }
01620 
01621 /*****************************************/
01622 /** Updates the total  gain array to reflect the desired tradeoff between
01623  * input sharing (sharinggain) and path_length minimization (lengthgain)
01624  */
01625 static void update_total_gain(float alpha, float beta, boolean timing_driven, boolean 
01626                               connection_driven, boolean global_clocks, t_pb *pb){
01627 
01628   int i, iblk, j, k;
01629   t_pb * cur_pb;
01630   int num_input_pins, num_output_pins;
01631   int num_used_input_pins, num_used_output_pins;
01632   t_model_ports *port;
01633 
01634   cur_pb = pb;
01635   while(cur_pb) {
01636 
01637           for (i=0;i<cur_pb->pb_stats.num_marked_blocks;i++) {
01638                   iblk=cur_pb->pb_stats.marked_blocks[i];
01639 
01640                   /* Todo: This was used to explore different normalization options, can be made more efficient once we decide on which one to use*/
01641                   num_input_pins = 0;
01642                   port = logical_block[iblk].model->inputs;
01643                   j =0;
01644                   num_used_input_pins = 0;
01645                   while(port) {
01646                            num_input_pins += port->size;
01647                            if(!port->is_clock) {
01648                                    for(k = 0; k < port->size; k++) {
01649                                            if(logical_block[iblk].input_nets[j][k] != OPEN) {
01650                                                         num_used_input_pins++;
01651                                            }
01652                                    }
01653                                    j++;
01654                            }
01655                            port = port->next;
01656                    }
01657                   if(num_input_pins == 0) {
01658                            num_input_pins = 1;
01659                   }
01660                   
01661                    num_used_output_pins = 0;
01662                    j = 0;
01663                    num_output_pins = 0;
01664                    port = logical_block[iblk].model->outputs;
01665                    while(port) {
01666                            num_output_pins += port->size;
01667                                 for(k = 0; k < port->size; k++) {
01668                                    if(logical_block[iblk].output_nets[j][k] != OPEN) {
01669                                                 num_used_output_pins++;
01670                                    }
01671                                 }
01672                            port = port->next;
01673                            j++;
01674                    }
01675                    /* end todo */
01676                           
01677               /* Calculate area-only cost function */
01678                   if (connection_driven) {
01679                         /*try to absorb as many connections as possible*/
01680                            /* TODO: change back to used pins when done testing */
01681                            cur_pb->pb_stats.gain[iblk] = ((1-beta)*(float)cur_pb->pb_stats.sharinggain[iblk] + beta*(float)cur_pb->pb_stats.connectiongain[iblk])/(num_input_pins + num_output_pins);
01682                            /* cur_pb->pb_stats.gain[iblk] = ((1-beta)*(float)cur_pb->pb_stats.sharinggain[iblk] + beta*(float)cur_pb->pb_stats.connectiongain[iblk])/(num_used_input_pins + num_used_output_pins); */
01683                   } else {
01684                           /*cur_pb->pb_stats.gain[iblk] = ((float)cur_pb->pb_stats.sharinggain[iblk])/(num_used_input_pins + num_used_output_pins); */
01685                           cur_pb->pb_stats.gain[iblk] = ((float)cur_pb->pb_stats.sharinggain[iblk])/(num_input_pins + num_output_pins);
01686                   }
01687 
01688                   /* Add in timing driven cost into cost function */
01689                   if (timing_driven) {
01690                           cur_pb->pb_stats.gain[iblk] = alpha*cur_pb->pb_stats.lengthgain[iblk] + (1.0-alpha)*(float)cur_pb->pb_stats.gain[iblk];
01691                   }
01692           }
01693           cur_pb = cur_pb->parent_pb;
01694   }
01695 }
01696 
01697 /*****************************************/
01698 /** Updates cluster stats such as gain, used pins, and clock structures.  */
01699 static void update_cluster_stats (      INP int new_blk,
01700                                                                         INP int clb_index,
01701                                                                         INP boolean *is_clock, 
01702                                                                         INP boolean global_clocks, 
01703                                                                         INP float alpha, 
01704                                                                         INP float beta,
01705                                                                         INP boolean timing_driven, 
01706                                                                         INP boolean connection_driven) {
01707 
01708 
01709  int i, ipin, inet, depth;
01710  t_model_ports *port;
01711  t_pb *cur_pb;
01712 
01713  /* TODO: what a scary comment from Vaughn, we'll have to watch out for this causing problems */
01714 /* Output can be open so the check is necessary.  I don't change  *
01715  * the gain for clock outputs when clocks are globally distributed  *
01716  * because I assume there is no real need to pack similarly clocked *
01717  * FFs together then.  Note that by updating the gain when the      *
01718  * clock driver is placed in a cluster implies that the output of   *
01719  * LUTs can be connected to clock inputs internally.  Probably not  *
01720  * true, but it doesn't make much difference, since it will still   *
01721  * make local routing of this clock very short, and none of my      *
01722  * benchmarks actually generate local clocks (all come from pads).  */
01723 
01724 
01725  logical_block[new_blk].clb_index = clb_index;
01726 
01727  cur_pb = logical_block[new_blk].pb->parent_pb;
01728  while(cur_pb) {
01729         depth = cur_pb->pb_graph_node->pb_type->depth;
01730         /* reset list of feasible blocks */
01731         for (i=0;i<cur_pb->pb_stats.num_marked_models;i++){ 
01732                 cur_pb->pb_stats.num_feasible_blocks[i] = 0;
01733         }
01734         cur_pb->pb_stats.num_marked_models = 0;
01735         cur_pb->pb_stats.cur_marked_model = NOT_VALID;
01736         
01737         /* determine valid pins */
01738         if (cur_pb->pb_stats.inputs_avail == NOT_VALID)
01739                 cur_pb->pb_stats.inputs_avail = cur_pb->pb_graph_node->pb_type->num_input_pins;
01740 
01741         if (cur_pb->pb_stats.outputs_avail == NOT_VALID)
01742                 cur_pb->pb_stats.outputs_avail = cur_pb->pb_graph_node->pb_type->num_output_pins;
01743 
01744         if (cur_pb->pb_stats.clocks_avail == NOT_VALID)
01745                 cur_pb->pb_stats.clocks_avail = cur_pb->pb_graph_node->pb_type->num_clock_pins;
01746 
01747         cur_pb = cur_pb->parent_pb;
01748  }  
01749 
01750  port = logical_block[new_blk].model->outputs;
01751  while(port) {
01752          for(ipin = 0; ipin < port->size; ipin++) {
01753                  inet = logical_block[new_blk].output_nets[port->index][ipin];    /* Output pin first. */
01754                  if (inet != OPEN) {
01755                         if (!is_clock[inet] || !global_clocks) 
01756                                 mark_and_update_partial_gain (inet, GAIN, new_blk, port->index, ipin, 
01757                                                         timing_driven, connection_driven, OUTPUT);
01758                         else
01759                                 mark_and_update_partial_gain (inet, NO_GAIN, new_blk, port->index, ipin, 
01760                                                         timing_driven, connection_driven, OUTPUT);
01761 
01762                         cur_pb = logical_block[new_blk].pb->parent_pb;
01763                         while(cur_pb) {
01764                                 depth = cur_pb->pb_graph_node->pb_type->depth;
01765                                 cur_pb->pb_stats.net_output_in_pb[inet] = TRUE;
01766 
01767                                 if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] > 1 && !is_clock[inet])
01768                                         cur_pb->pb_stats.inputs_avail++;
01769 
01770                                 if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] + 
01771                                         net_output_feeds_driving_block_input[inet] != vpack_net[inet].num_sinks + 1) {
01772                                                 cur_pb->pb_stats.outputs_avail--;
01773                                 } else if (hack_frac_lut_no_legality) {
01774                                         if(depth > 1)
01775                                                 cur_pb->pb_stats.outputs_avail--; /* frac lut cannot absorb pins after the top two levels  */
01776                                 }
01777 
01778                                 cur_pb = cur_pb->parent_pb;
01779                         }
01780                  }
01781          }
01782          port = port->next;
01783  }
01784  port = logical_block[new_blk].model->inputs;
01785  while(port) {
01786          if(port->is_clock) {
01787                  port = port->next;
01788                  continue;
01789          }
01790          for (ipin=0; ipin<port->size; ipin++) {   /*  VPACK_BLOCK input pins. */
01791 
01792                  inet = logical_block[new_blk].input_nets[port->index][ipin];
01793                 if (inet != OPEN) {
01794                    mark_and_update_partial_gain (inet, GAIN, new_blk, port->index, ipin, timing_driven, connection_driven, INPUT);
01795                    cur_pb = logical_block[new_blk].pb->parent_pb;
01796                    while(cur_pb) {
01797                                 depth = cur_pb->pb_graph_node->pb_type->depth;
01798                                 
01799                                 if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == 1) 
01800                                         cur_pb->pb_stats.inputs_avail--;
01801 
01802                                 assert(cur_pb->pb_stats.inputs_avail >= 0);
01803                                 if (cur_pb->pb_stats.net_output_in_pb[inet] &&
01804                                         cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == vpack_net[inet].num_sinks + 1) {
01805                                                 cur_pb->pb_stats.outputs_avail++;
01806                                                 if (hack_frac_lut_no_legality && depth > 1)
01807                                                         cur_pb->pb_stats.outputs_avail--; /* frac lut cannot absorb pins after the top two levels */
01808                                 }
01809 
01810                                 cur_pb = cur_pb->parent_pb;
01811                         }
01812                 }
01813          }
01814          port = port->next;
01815  }
01816 
01817 
01818 /* Note:  The code below ONLY WORKS when nets that go to clock inputs *
01819  * NEVER go to the input of a VPACK_COMB.  It doesn't really make electrical *
01820  * sense for that to happen, and I check this in the check_clocks     *
01821  * function.  Don't disable that sanity check.                        */
01822  inet = logical_block[new_blk].clock_net;    /* Clock input pin. */
01823  if (inet != OPEN) {
01824          if (global_clocks) 
01825        mark_and_update_partial_gain (inet, NO_GAIN, new_blk, 0, 
01826                                      0, timing_driven,
01827                                      connection_driven, INPUT);
01828     else
01829        mark_and_update_partial_gain (inet, GAIN, new_blk, 0, 
01830                                      0, timing_driven,
01831                                      connection_driven, INPUT);
01832 
01833 
01834         cur_pb = logical_block[new_blk].pb->parent_pb;
01835    while(cur_pb) {
01836                 depth = cur_pb->pb_graph_node->pb_type->depth;
01837                 
01838                 if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == 1) {
01839                         cur_pb->pb_stats.clocks_avail--;
01840                 }
01841                 else if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == 2 && cur_pb->pb_stats.net_output_in_pb[inet]) {
01842                         cur_pb->pb_stats.clocks_avail--;
01843                 }
01844         /* Note: unlike inputs, I'm currently assuming there is no internal *
01845          * connection in a cluster from VPACK_LUT outputs to clock inputs.        */
01846                 
01847                 cur_pb = cur_pb->parent_pb;
01848         }
01849  }
01850 
01851  update_total_gain(alpha, beta, timing_driven, connection_driven, global_clocks, logical_block[new_blk].pb->parent_pb);
01852 
01853 }
01854 
01855 
01856 /** Given a starting seed block, start_new_cluster determines the next cluster type to use. 
01857  *   It expands the FPGA if it cannot find a legal cluster for the logical block
01858  */
01859 static void start_new_cluster(INP enum e_packer_algorithm packer_algorithm,
01860                                                           INP const t_arch * arch,
01861                                                           INOUTP t_block *new_cluster,
01862                                                             INP int clb_index,
01863                                                                 INP int istart,
01864                                                                 INP float aspect,
01865                                                                 INOUTP int *num_used_instances_type, 
01866                                                                 INOUTP int *num_instances_type,
01867                                                                 INP int num_models,
01868                                                                 INP int max_cluster_size,
01869                                                                 INP int max_primitive_inputs) {
01870  int i, j;
01871  boolean success;
01872 
01873  assert(new_cluster->name == NULL); /* Check if this cluster is really empty */
01874 
01875  /* Allocate a dummy initial cluster and load a logical block as a seed and check if it is legal */
01876  new_cluster->name = my_malloc(strlen(logical_block[istart].name) + 4);
01877  sprintf(new_cluster->name, "cb.%s", logical_block[istart].name);
01878  new_cluster->nets = NULL;
01879  new_cluster->type = NULL;
01880  new_cluster->pb = NULL;
01881  new_cluster->x = UNDEFINED;
01882  new_cluster->y = UNDEFINED;
01883  new_cluster->z = UNDEFINED;
01884 
01885  success = FALSE;
01886 
01887  while(!success) {
01888          for(i = 0; i < num_types; i++) {
01889                 if(num_used_instances_type[i] < num_instances_type[i]) {
01890                         new_cluster->type = &type_descriptors[i];
01891                         if(new_cluster->type == EMPTY_TYPE) {
01892                                 continue;
01893                         }
01894                         new_cluster->pb = my_calloc(1, sizeof(t_pb));
01895                         alloc_and_load_pb_stats(new_cluster->pb, num_models, max_cluster_size, max_primitive_inputs);
01896                         new_cluster->pb->parent_pb = NULL;
01897                         new_cluster->pb->pb_graph_node = new_cluster->type->pb_graph_head;
01898                         
01899                         alloc_and_load_legalizer_for_cluster(new_cluster, clb_index, arch);
01900                         for(j = 0; j < new_cluster->type->pb_graph_head->pb_type->num_modes && !success; j++) {
01901                                 new_cluster->pb->mode = j;
01902                                 if(models_feasible(packer_algorithm, istart, new_cluster->pb->pb_graph_node->pb_type, new_cluster->pb, new_cluster->pb->mode)) {
01903                                         success = (BLK_PASSED == alloc_and_load_pb(packer_algorithm, new_cluster->type->pb_graph_head, istart, new_cluster->pb, num_models, max_cluster_size, max_primitive_inputs)); 
01904                                 }
01905                         }                        
01906                         if(success) {
01907                                 
01908                                 if(hack_frac_lut_no_legality) {
01909                                         logical_block[istart].clb_index = clb_index;
01910                                 }
01911                                 /* TODO: For now, just grab any working cluster, in the future, heuristic needed to grab best complex block based on supply and demand */
01912                                 break;
01913                         } else {
01914                                 free_legalizer_for_cluster(new_cluster);
01915                                 free_pb_stats(new_cluster->pb->pb_stats, num_models);
01916                                 free(new_cluster->pb);
01917                         }                       
01918                 }
01919          }
01920 
01921          /* Expand FPGA size and recalculate number of available cluster types*/
01922          if (!success) {
01923                 if(aspect >= 1.0)
01924                 {
01925                     ny++;
01926                     nx = nint(ny * aspect);
01927                 }
01928             else
01929                 {
01930                     nx++;
01931                     ny = nint(nx / aspect);
01932                 }
01933                 printf("Not enough resources expand FPGA size to x = %d y = %d\n", nx, ny);
01934                 if((nx > MAX_SHORT) || (ny > MAX_SHORT)) {
01935                         printf("Circuit cannot pack into architecture, architecture size (nx = %d, ny = %d) exceeds packer range.\n", nx, ny);
01936                         exit(1);
01937                 }
01938                 alloc_and_load_grid(num_instances_type);
01939                 freeGrid();
01940          }
01941  }
01942  num_used_instances_type[new_cluster->type->index]++;
01943 }
01944 
01945 
01946 /*****************************************/
01947 /** Checks if adding iblk to the currently open cluster would violate 
01948  * the cluster input and clock pin limitations.  Returns TRUE if     
01949  * iblk can be added to the cluster, FALSE otherwise.                
01950  */
01951 static boolean inputs_outputs_models_and_clocks_feasible (INP enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb) {
01952 
01953  int ipin, inet, output_net;
01954  t_model_ports *port;
01955  t_pb* temp_pb;
01956  boolean inputs_feasible = TRUE;
01957  
01958  int inputs_avail;
01959  
01960  temp_pb = cur_pb;
01961  while(temp_pb && inputs_feasible) {
01962          inputs_avail = temp_pb->pb_stats.inputs_avail;
01963          if(inputs_avail == NOT_VALID) {
01964                  inputs_avail = temp_pb->pb_graph_node->pb_type->num_input_pins;
01965          }
01966 
01967          port = logical_block[iblk].model->outputs;
01968          while(port) {
01969                 /* Outputs that connect to internal blocks free up an input pin. */
01970                  for(ipin = 0; ipin < port->size; ipin++) {
01971                          output_net = logical_block[iblk].output_nets[port->index][ipin];
01972                          if (output_net != OPEN && temp_pb->pb_stats.num_pins_of_net_in_pb[output_net] != 0 && !is_clock[output_net])
01973                                 inputs_avail++;
01974                  }
01975                  port = port->next;
01976          }
01977 
01978          port = logical_block[iblk].model->inputs;
01979          while(port) {
01980                 if(!port->is_clock) {
01981                 /* Inputs that do not connect to an output pin of an internal block (including this one) require an input pin. */
01982                  for (ipin = 0; ipin < port->size; ipin++) {
01983                         inet = logical_block[iblk].input_nets[port->index][ipin];
01984                         if (inet != OPEN && temp_pb->pb_stats.num_pins_of_net_in_pb[inet] == 0) {
01985                                 if(net_output_feeds_driving_block_input[inet] == 0) {
01986                                         inputs_avail--;
01987                                 } 
01988                         }
01989                  }
01990                 }
01991                 port = port->next;
01992          }
01993          inputs_feasible = (inputs_avail >= 0);
01994          temp_pb = temp_pb->parent_pb;
01995  }
01996  if (outputs_clocks_and_models_feasible (packer_algorithm, iblk, is_clock, cur_pb) == FALSE) 
01997     return (FALSE); 
01998 
01999  if (inputs_feasible) 
02000     return (TRUE);
02001  else
02002     return (FALSE);
02003 }
02004 
02005 /*****************************************/
02006 /** This routine populates a list of feasible blocks outside the cluster then returns the best one for the list    
02007  * not currently in a cluster and satisfies the feasibility     
02008  * function passed in as is_feasible.  If there are no feasible 
02009  * blocks it returns NO_CLUSTER.                                
02010  */
02011 static int get_highest_gain_logical_block ( INP enum e_packer_algorithm packer_algorithm,
02012                                                                                    INOUTP t_pb *cur_pb,
02013                                                                                                 INP boolean *is_clock, 
02014                                                                                                 INP boolean (*is_feasible) (enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb), 
02015                                                                                                 INP enum e_gain_type gain_mode) {
02016 
02017         int i, j, iblk, index;
02018         int best_hillgain;
02019         float best_gain;
02020 
02021         int best_block;
02022 
02023         best_gain = (float)NOT_VALID + 1.0; 
02024         best_hillgain=NOT_VALID+1;
02025 
02026         best_block = NO_CLUSTER;
02027 
02028         if(cur_pb->pb_stats.cur_marked_model == NOT_VALID) {
02029                 /* Divide into two cases for speed only. */
02030                 /* Typical case:  not too many blocks have been marked. */
02031 
02032                 if (cur_pb->pb_stats.num_marked_blocks < num_logical_blocks / MARKED_FRAC) {
02033                         for (i=0;i<cur_pb->pb_stats.num_marked_blocks;i++) {
02034                                 iblk = cur_pb->pb_stats.marked_blocks[i];
02035                                 if (logical_block[iblk].clb_index == NO_CLUSTER) {
02036                                         if (gain_mode != HILL_CLIMBING){
02037                                                 if (is_feasible (packer_algorithm, iblk, is_clock, cur_pb)) {
02038                                                         add_blk_to_pb_stats_candidates(iblk, cur_pb->pb_stats.gain, cur_pb);
02039                                                         if (cur_pb->pb_stats.gain[iblk] > best_gain) {
02040                                                                 best_gain = cur_pb->pb_stats.gain[iblk];
02041                                                                 best_block = iblk;
02042                                                         }
02043                                                 }                                                       
02044                                         }
02045                                         else{  /*hill climbing*/
02046                                                 if (is_feasible (packer_algorithm, iblk, is_clock, cur_pb)) {
02047                                                         add_blk_to_pb_stats_candidates(iblk, cur_pb->pb_stats.hillgain, cur_pb);
02048                                                         if (cur_pb->pb_stats.hillgain[iblk] > best_hillgain) {
02049                                                                 best_hillgain = cur_pb->pb_stats.hillgain[iblk];
02050                                                                 best_block = iblk;
02051                                                         }
02052                                                 }                                               
02053                                         }
02054                                 }
02055                         }
02056                 }
02057                 else {        /* Some high fanout nets marked lots of blocks. */
02058                         for (iblk=0;iblk<num_logical_blocks;iblk++) {
02059                                 if (logical_block[iblk].clb_index == NO_CLUSTER) {
02060                                         if (gain_mode != HILL_CLIMBING){
02061                                                 if (is_feasible (packer_algorithm, iblk, is_clock, cur_pb)) {
02062                                                         add_blk_to_pb_stats_candidates(iblk, cur_pb->pb_stats.gain, cur_pb);
02063                                                         if (cur_pb->pb_stats.gain[iblk] > best_gain) {
02064                                                                 best_gain = cur_pb->pb_stats.gain[iblk];
02065                                                                 best_block = iblk;
02066                                                         }
02067                                                 }       
02068                                         }
02069                                         else{ /*hill climbing*/
02070                                                 if (is_feasible (packer_algorithm, iblk, is_clock, cur_pb)) {
02071                                                         add_blk_to_pb_stats_candidates(iblk, cur_pb->pb_stats.hillgain, cur_pb);
02072                                                         if (cur_pb->pb_stats.hillgain[iblk] > best_hillgain) {
02073                                                                 best_hillgain = cur_pb->pb_stats.hillgain[iblk];
02074                                                                 best_block = iblk;
02075                                                         }
02076                                                 }       
02077                                         }
02078                                 }
02079                         }
02080                 }
02081 
02082                 for(i = 0; i < cur_pb->pb_stats.num_marked_models; i++) {
02083                         index = cur_pb->pb_stats.feasible_blocks[i][cur_pb->pb_stats.num_feasible_blocks[i] - 1];
02084                         if (gain_mode != HILL_CLIMBING) {
02085                                 if(cur_pb->pb_stats.gain[index] == cur_pb->pb_stats.gain[best_block]) {
02086                                         cur_pb->pb_stats.cur_marked_model = i;
02087                                         break;
02088                                 }
02089                         } else {
02090                                 if(cur_pb->pb_stats.hillgain[index] == cur_pb->pb_stats.hillgain[best_block]) {
02091                                         cur_pb->pb_stats.cur_marked_model = i;
02092                                         break;
02093                                 }
02094                         }
02095                 }
02096                 assert(cur_pb->pb_stats.num_marked_models == 0 || i < cur_pb->pb_stats.num_marked_models);
02097         }
02098 
02099         if(cur_pb->pb_stats.cur_marked_model == NOT_VALID) {
02100                 return NO_CLUSTER;
02101         }
02102 
02103         for(i = 0; i < cur_pb->pb_stats.num_marked_models; i++) {
02104                 for(j = 0; j < cur_pb->pb_stats.num_feasible_blocks[i]; j++) {
02105                         if(cur_pb->pb_stats.num_feasible_blocks[cur_pb->pb_stats.cur_marked_model] != 0) {
02106                                 cur_pb->pb_stats.num_feasible_blocks[cur_pb->pb_stats.cur_marked_model]--;
02107                                 index = cur_pb->pb_stats.num_feasible_blocks[cur_pb->pb_stats.cur_marked_model];
02108                                 iblk = cur_pb->pb_stats.feasible_blocks[cur_pb->pb_stats.cur_marked_model][index];
02109                                 cur_pb->pb_stats.cur_marked_model = (cur_pb->pb_stats.cur_marked_model + 1) % cur_pb->pb_stats.num_marked_models;
02110                                 return iblk;
02111                         }
02112                 }
02113         }
02114 
02115         return NO_CLUSTER;
02116 }
02117 
02118 
02119 /*****************************************/
02120 /** Finds the vpack block with the the greatest gain that satisifies the      
02121  * input, clock and capacity constraints of a cluster that are       
02122  * passed in.  If no suitable vpack block is found it returns NO_CLUSTER.   
02123  */
02124 static int get_logical_block_for_cluster (      INP enum e_packer_algorithm packer_algorithm,
02125                                                                                         INOUTP t_pb *cur_pb,
02126                                                                                         INP boolean *is_clock,
02127                                                                                         INP boolean allow_unrelated_clustering) {
02128 
02129  int best_block;
02130 
02131  /* If cannot pack into primitive, try packing into cluster */
02132 
02133 
02134  best_block = get_highest_gain_logical_block (packer_algorithm, cur_pb, is_clock, inputs_outputs_models_and_clocks_feasible,
02135                                                  NOT_HILL_CLIMBING);
02136 
02137 /* If no blocks have any gain to the current cluster, the code above *
02138  * will not find anything.  However, another logical_block with no inputs in *
02139  * common with the cluster may still be inserted into the cluster.   */
02140 
02141  if (allow_unrelated_clustering)
02142    if (best_block == NO_CLUSTER) 
02143      best_block = get_free_logical_block_with_most_ext_inputs_for_cluster (packer_algorithm, cur_pb);
02144 
02145  return best_block;
02146 }
02147 
02148 /*****************************************/
02149 static void alloc_and_load_cluster_info (INP int num_clb, INOUTP t_block *clb) {
02150 
02151         /* Loads all missing clustering info necessary to complete clustering.  */
02152         int i, j, i_clb, node_index, ipin, iclass;
02153         int inport, outport, clockport;
02154         
02155         const t_pb_type * pb_type;
02156         t_pb *pb;
02157         
02158         for(i_clb = 0; i_clb < num_clb; i_clb++) {
02159                 rr_node = clb[i_clb].pb->rr_graph;
02160                 pb_type = clb[i_clb].pb->pb_graph_node->pb_type;
02161                 pb = clb[i_clb].pb;
02162 
02163                 clb[i_clb].nets = my_malloc(clb[i_clb].type->num_pins * sizeof(int));
02164                 for(i = 0; i < clb[i_clb].type->num_pins; i++) {
02165                         clb[i_clb].nets[i] = OPEN;
02166                 }
02167 
02168                 inport = outport = clockport = 0;       
02169                 ipin = 0;
02170                 /* Assume top-level pb and clb share a one-to-one connection */
02171                 for(i = 0; i < pb_type->num_ports; i++) {
02172                         if(!pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT) {
02173                                 for(j = 0; j < pb_type->ports[i].num_pins; j++) {
02174                                         iclass = clb[i_clb].type->pin_class[ipin];
02175                                         assert(clb[i_clb].type->class_inf[iclass].type == RECEIVER);
02176                                         assert(!clb[i_clb].type->is_global_pin[ipin]);
02177                                         node_index = pb->pb_graph_node->input_pins[inport][j].pin_count_in_cluster;
02178                                         clb[i_clb].nets[ipin] = rr_node[node_index].net_num;
02179                                         ipin++;
02180                                 }
02181                                 inport++;
02182                         } else if(pb_type->ports[i].type == OUT_PORT) {
02183                                 for(j = 0; j < pb_type->ports[i].num_pins; j++) {
02184                                         iclass = clb[i_clb].type->pin_class[ipin];
02185                                         assert(clb[i_clb].type->class_inf[iclass].type == DRIVER);
02186                                         node_index = pb->pb_graph_node->output_pins[outport][j].pin_count_in_cluster;
02187                                         clb[i_clb].nets[ipin] = rr_node[node_index].net_num;
02188                                         ipin++;
02189                                 }
02190                                 outport++;
02191                         } else {
02192                                 assert(pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT);
02193                                 for(j = 0; j < pb_type->ports[i].num_pins; j++) {
02194                                         iclass = clb[i_clb].type->pin_class[ipin];
02195                                         assert(clb[i_clb].type->class_inf[iclass].type == RECEIVER);
02196                                         assert(clb[i_clb].type->is_global_pin[ipin]);
02197                                         node_index = pb->pb_graph_node->clock_pins[clockport][j].pin_count_in_cluster;
02198                                         clb[i_clb].nets[ipin] = rr_node[node_index].net_num;
02199                                         ipin++;
02200                                 }
02201                                 clockport++;
02202                         }
02203                 }
02204         }
02205 }
02206 
02207 /* TODO: Add more error checking, too light */
02208 /*****************************************/
02209 static void check_clustering (int num_clb, t_block *clb, boolean *is_clock) {
02210         int i;
02211         t_pb * cur_pb;
02212         boolean *blocks_checked;
02213 
02214         blocks_checked = my_calloc(num_logical_blocks, sizeof(boolean));
02215 
02216         /* 
02217          * Check that each logical block connects to one primitive and that the primitive links up to the parent clb
02218          */
02219         for(i = 0; i < num_blocks; i++) {
02220                 if(logical_block[i].pb->logical_block != i) {
02221                         printf(ERRTAG "pb %s does not contain logical block %s but logical block %s #%d links to pb\n",
02222                                         logical_block[i].pb->name, logical_block[i].name, logical_block[i].name, i);
02223                         exit(1);
02224                 }
02225                 cur_pb = logical_block[i].pb;
02226                 assert(strcmp(cur_pb->name, logical_block[i].name) == 0);
02227                 while(cur_pb->parent_pb) {
02228                         cur_pb = cur_pb->parent_pb;
02229                         assert(cur_pb->name);
02230                 }
02231                 if(cur_pb != clb[num_clb].pb) {
02232                         printf(ERRTAG "CLB %s does not match CLB contained by pb %s\n",
02233                                 cur_pb->name, logical_block[i].pb->name);
02234                         exit(1);
02235                 }
02236         }
02237 
02238         /* Check that I do not have spurious links in children pbs */
02239         for(i = 0; i < num_clb; i++) {
02240                 check_cluster_logical_blocks(clb[i].pb, blocks_checked);
02241         }
02242 
02243         for(i = 0; i < num_logical_blocks; i++) {
02244                 if(blocks_checked[i] == FALSE) {
02245                         printf(ERRTAG "Logical block %s #%d not found in any cluster\n", logical_block[i].name, i);
02246                         exit(1);
02247                 }
02248         }
02249 
02250         free(blocks_checked);
02251 }
02252 
02253 /* TODO: May want to check that all logical blocks are actually reached (low priority, nice to have) */
02254 static void check_cluster_logical_blocks (t_pb *pb, boolean *blocks_checked) {
02255         int i, j;
02256         const t_pb_type *pb_type;
02257         boolean has_child;
02258 
02259         has_child = FALSE;
02260         pb_type = pb->pb_graph_node->pb_type;
02261         if(pb_type->num_modes == 0) {
02262                 /* primitive */
02263                 if(pb->logical_block != OPEN) {
02264                         if(blocks_checked[pb->logical_block] != FALSE) {
02265                                 printf(ERRTAG "pb %s contains logical block %s #%d but logical block is already contained in another pb\n",
02266                                         pb->name, logical_block[pb->logical_block].name, pb->logical_block);
02267                                 exit(1);
02268                         }
02269                         blocks_checked[pb->logical_block] = TRUE;
02270                         if(pb != logical_block[pb->logical_block].pb) {
02271                                 printf(ERRTAG "pb %s contains logical block %s #%d but logical block does not link to pb\n",
02272                                         pb->name, logical_block[pb->logical_block].name, pb->logical_block);
02273                                 exit(1);
02274                         }
02275                 }
02276         } else {
02277                 /* this is a container pb, all container pbs must contain children */
02278                 for(i = 0; i < pb_type->modes[pb->mode].num_pb_type_children; i++) {
02279                         for(j = 0; j < pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) {
02280                                 if(pb->child_pbs[i] != NULL) {
02281                                         if(pb->child_pbs[i][j].name != NULL) {
02282                                                 has_child = TRUE;
02283                                                 check_cluster_logical_blocks(&pb->child_pbs[i][j], blocks_checked);
02284                                         }
02285                                 }
02286                         }       
02287                 }
02288                 assert(has_child);
02289         }
02290 }
02291 
02292 /** calculate_timing_information must be called before this, or this function
02293  * will return garbage 
02294  */
02295 static int get_most_critical_unclustered_block(void) {
02296   /*indexofcrit is a global variable that is reset to 0 each time a          *
02297    *timing analysis is done (reset in  sort_blocks_by_criticality)           */
02298  
02299   int critidx, blkidx;
02300 
02301   for (critidx=indexofcrit; critidx<num_logical_blocks; critidx++) {
02302     blkidx=critindexarray[critidx];
02303         if (logical_block[blkidx].clb_index == NO_CLUSTER) {
02304       indexofcrit=critidx+1;
02305       return(blkidx);
02306     }
02307   }
02308 
02309   /*if it makes it to here , there are no more blocks available*/
02310   return(NO_CLUSTER);
02311 }
02312 
02313 static void free_cb (t_pb *pb, int max_models) {
02314         const t_pb_type * pb_type;
02315         int i, total_nodes;
02316         
02317         pb_type = pb->pb_graph_node->pb_type;
02318         
02319         total_nodes = pb->pb_graph_node->total_pb_pins + pb_type->num_input_pins + pb_type->num_output_pins + pb_type->num_clock_pins;
02320 
02321         for(i = 0; i < total_nodes; i++) {
02322                 if(pb->rr_graph[i].edges != NULL) {
02323                         free(pb->rr_graph[i].edges);
02324                 }
02325                 if(pb->rr_graph[i].switches != NULL) {
02326                         free(pb->rr_graph[i].switches);
02327                 }
02328         }
02329         free(pb->rr_graph);
02330         free_pb(pb, max_models);
02331 }
02332 
02333 
02334 static void free_pb (t_pb *pb, int max_models) {
02335         const t_pb_type * pb_type;
02336         int i, j, mode;
02337 
02338         pb_type = pb->pb_graph_node->pb_type;
02339         
02340         if(pb_type->blif_model == NULL) {
02341                 mode = pb->mode;
02342                 for(i = 0; i < pb_type->modes[mode].num_pb_type_children && pb->child_pbs != NULL; i++) {
02343                         for(j = 0; j < pb_type->modes[mode].pb_type_children[i].num_pb && pb->child_pbs[i] != NULL; j++) {
02344                                 if(pb->child_pbs[i][j].name != NULL) {
02345                                         free_pb(&pb->child_pbs[i][j], max_models);
02346                                 }
02347                         }
02348                         if(pb->child_pbs[i])
02349                                 free(pb->child_pbs[i]);
02350                 }
02351                 if(pb->child_pbs);
02352                         free(pb->child_pbs);
02353                 free(pb->name);
02354         } else {
02355                 /* Primitive */
02356                 if(pb->name)
02357                         free(pb->name);
02358                 pb->name = NULL;
02359         }
02360 }
02361 
02362 static void free_pb_stats(t_pb_stats pb_stats, int max_models) {
02363         int i;
02364 
02365         if(pb_stats.gain != NULL) {
02366                 free(pb_stats.gain);
02367                 free(pb_stats.lengthgain);
02368                 free(pb_stats.sharinggain);
02369                 free(pb_stats.hillgain);
02370                 free(pb_stats.connectiongain);
02371                 free(pb_stats.num_feasible_blocks);
02372 
02373                 for(i = 0; i < max_models; i ++) {
02374                         free(pb_stats.feasible_blocks[i]);
02375                 }
02376                 free(pb_stats.feasible_blocks);
02377 
02378                 free(pb_stats.num_pins_of_net_in_pb);
02379                 free(pb_stats.net_output_in_pb);
02380                 free(pb_stats.marked_nets);
02381                 free(pb_stats.marked_blocks);   
02382 
02383                 pb_stats.gain = NULL;           
02384         }
02385 }
02386