VPR-6.0

vpr/SRC/route/rr_graph.c

Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include <math.h>
00003 #include <assert.h>
00004 #include <string.h>
00005 #include "util.h"
00006 #include "vpr_types.h"
00007 #include "globals.h"
00008 #include "rr_graph_util.h"
00009 #include "rr_graph.h"
00010 #include "rr_graph2.h"
00011 #include "rr_graph_sbox.h"
00012 #include "check_rr_graph.h"
00013 #include "rr_graph_timing_params.h"
00014 #include "rr_graph_indexed_data.h"
00015 #include "vpr_utils.h"
00016 #include "read_xml_arch_file.h"
00017 
00018 /* #define ENABLE_DUMP */
00019 /* #define MUX_SIZE_DIST_DISPLAY */
00020 
00021 /** mux size statistic data structures */
00022 typedef struct s_mux
00023 {
00024     int size;
00025     struct s_mux *next;
00026 }
00027 t_mux;
00028 
00029 typedef struct s_mux_size_distribution
00030 {
00031     int mux_count;
00032     int max_index;
00033     int *distr;
00034     struct s_mux_size_distribution *next;
00035 }
00036 t_mux_size_distribution;
00037 
00038 /* UDSD Modifications by WMF End */
00039 
00040 
00041 /******************* Variables local to this module. ***********************/
00042 
00043 static struct s_linked_vptr *rr_mem_chunk_list_head = NULL;
00044 
00045 /** Used to free "chunked" memory.  If NULL, no rr_graph exists right now.  */
00046 static int chunk_bytes_avail = 0;
00047 /** Status of current chunk being dished out by calls to my_chunk_malloc.   */
00048 static char *chunk_next_avail_mem = NULL;
00049 
00050 /********************* Subroutines local to this module. *******************/
00051 static int ****alloc_and_load_pin_to_track_map(INP enum e_pin_type pin_type,
00052                                                INP int nodes_per_chan,
00053                                                INP int Fc,
00054                                                INP t_type_ptr Type,
00055                                                INP boolean
00056                                                perturb_switch_pattern,
00057                                                INP enum e_directionality
00058                                                directionality);
00059 
00060 static struct s_ivec ***alloc_and_load_track_to_pin_lookup(INP int
00061                                                            ****pin_to_track_map,
00062                                                            INP int Fc,
00063                                                            INP int height,
00064                                                            INP int num_pins,
00065                                                            INP int
00066                                                            nodes_per_chan);
00067 
00068 static void build_bidir_rr_opins(INP int i,
00069                                  INP int j,
00070                                  INOUTP t_rr_node * rr_node,
00071                                  INP t_ivec *** rr_node_indices,
00072                                  INP int *****opin_to_track_map,
00073                                  INP int *Fc_out,
00074                                  INP boolean * rr_edge_done,
00075                                  INP t_seg_details * seg_details,
00076                                  INP struct s_grid_tile **grid);
00077 
00078 static void build_unidir_rr_opins(INP int i,
00079                                   INP int j,
00080                                   INP struct s_grid_tile **grid,
00081                                   INP int *Fc_out,
00082                                   INP int nodes_per_chan,
00083                                   INP t_seg_details * seg_details,
00084                                   INOUTP int **Fc_xofs,
00085                                   INOUTP int **Fc_yofs,
00086                                   INOUTP t_rr_node * rr_node,
00087                                   INOUTP boolean * rr_edge_done,
00088                                   OUTP boolean * Fc_clipped,
00089                                   INP t_ivec *** rr_node_indices);
00090 
00091 static void alloc_and_load_rr_graph(INP int num_nodes,
00092                                     INP t_rr_node * rr_node,
00093                                     INP int num_seg_types,
00094                                     INP t_seg_details * seg_details,
00095                                     INP boolean * rr_edge_done,
00096                                     INP struct s_ivec ****track_to_ipin_lookup,
00097                                     INP int *****opin_to_track_map,
00098                                     INP struct s_ivec ***switch_block_conn,
00099                                     INP struct s_grid_tile **grid,
00100                                     INP int nx,
00101                                     INP int ny,
00102                                     INP int Fs,
00103                                     INP short *****sblock_pattern,
00104                                     INP int *Fc_out,
00105                                     INP int **Fc_xofs,
00106                                     INP int **Fc_yofs,
00107                                     INP t_ivec *** rr_node_indices,
00108                                     INP int nodes_per_chan,
00109                                     INP enum e_switch_block_type sb_type,
00110                                     INP int delayless_switch,
00111                                     INP enum e_directionality directionality,
00112                                     INP int wire_to_ipin_switch,
00113                                     OUTP boolean * Fc_clipped);
00114 
00115 static void load_uniform_switch_pattern(INP t_type_ptr type,
00116                                         INOUTP int ****tracks_connected_to_pin,
00117                                         INP int num_phys_pins,
00118                                         INP int *pin_num_ordering,
00119                                         INP int *side_ordering,
00120                                         INP int *offset_ordering,
00121                                         INP int nodes_per_chan,
00122                                         INP int Fc,
00123                                         INP enum e_directionality
00124                                         directionality);
00125 
00126 static void load_perturbed_switch_pattern(INP t_type_ptr type,
00127                                           INOUTP int
00128                                           ****tracks_connected_to_pin,
00129                                           INP int num_phys_pins,
00130                                           INP int *pin_num_ordering,
00131                                           INP int *side_ordering,
00132                                           INP int *offset_ordering,
00133                                           INP int nodes_per_chan,
00134                                           INP int Fc,
00135                                           INP enum e_directionality
00136                                           directionality);
00137 
00138 static void check_all_tracks_reach_pins(t_type_ptr type,
00139                                         int ****tracks_connected_to_pin,
00140                                         int nodes_per_chan,
00141                                         int Fc,
00142                                         enum e_pin_type ipin_or_opin);
00143 
00144 static boolean *alloc_and_load_perturb_ipins(INP int nodes_per_chan,
00145                                              INP int num_types,
00146                                              INP int *Fc_in,
00147                                              INP int *Fc_out,
00148                                              INP enum e_directionality
00149                                              directionality);
00150 
00151 
00152 static void build_rr_sinks_sources(INP int i,
00153                                    INP int j,
00154                                    INP t_rr_node * rr_node,
00155                                    INP t_ivec *** rr_node_indices,
00156                                    INP int delayless_switch,
00157                                    INP struct s_grid_tile **grid);
00158 
00159 static void build_rr_xchan(INP int i,
00160                            INP int j,
00161                            INP struct s_ivec ****track_to_ipin_lookup,
00162                            INP struct s_ivec ***switch_block_conn,
00163                            INP int cost_index_offset,
00164                            INP int nodes_per_chan,
00165                            INP int *opin_mux_size,
00166                            INP short *****sblock_pattern,
00167                            INP int Fs_per_side,
00168                            INP t_seg_details * seg_details,
00169                            INP t_ivec *** rr_node_indices,
00170                            INP boolean * rr_edge_done,
00171                            INOUTP t_rr_node * rr_node,
00172                            INP int wire_to_ipin_switch,
00173                            INP enum e_directionality directionality);
00174 
00175 static void build_rr_ychan(INP int i,
00176                            INP int j,
00177                            INP struct s_ivec ****track_to_ipin_lookup,
00178                            INP struct s_ivec ***switch_block_conn,
00179                            INP int cost_index_offset,
00180                            INP int nodes_per_chan,
00181                            INP int *opin_mux_size,
00182                            INP short *****sblock_pattern,
00183                            INP int Fs_per_side,
00184                            INP t_seg_details * seg_details,
00185                            INP t_ivec *** rr_node_indices,
00186                            INP boolean * rr_edge_done,
00187                            INOUTP t_rr_node * rr_node,
00188                            INP int wire_to_ipin_switch,
00189                            INP enum e_directionality directionality);
00190 
00191 
00192 static void rr_graph_externals(
00193                                t_timing_inf timing_inf,
00194                                t_segment_inf * segment_inf,
00195                                int num_seg_types,
00196                                int nodes_per_chan,
00197                                int wire_to_ipin_switch,
00198                                enum e_base_cost_type base_cost_type);
00199 
00200 void alloc_and_load_edges_and_switches(INP t_rr_node * rr_node,
00201                                        INP int inode,
00202                                        INP int num_edges,
00203                                        INP boolean * rr_edge_done,
00204                                        INP t_linked_edge * edge_list_head);
00205 
00206 static void alloc_net_rr_terminals(void);
00207 
00208 static void alloc_and_load_rr_clb_source(t_ivec *** rr_node_indices);
00209 
00210 #if 0
00211 static void load_uniform_opin_switch_pattern_paired(INP int *Fc_out,
00212                                                     INP int num_pins,
00213                                                     INP int *pins_in_chan_seg,
00214                                                     INP int num_wire_inc_muxes,
00215                                                     INP int num_wire_dec_muxes,
00216                                                     INP int *wire_inc_muxes,
00217                                                     INP int *wire_dec_muxes,
00218                                                     INOUTP t_rr_node * rr_node,
00219                                                     INOUTP boolean *
00220                                                     rr_edge_done,
00221                                                     INP t_seg_details *
00222                                                     seg_details,
00223                                                     OUTP boolean * Fc_clipped);
00224 #endif
00225 void watch_edges(int inode,
00226                  t_linked_edge * edge_list_head);
00227 #if MUX_SIZE_DIST_DISPLAY
00228 static void view_mux_size_distribution(t_ivec *** rr_node_indices,
00229                                        int nodes_per_chan,
00230                                        t_seg_details * seg_details_x,
00231                                        t_seg_details * seg_details_y);
00232 static void print_distribution(FILE * fptr,
00233                                t_mux_size_distribution * distr_struct);
00234 #endif
00235 
00236 static void free_type_pin_to_track_map(int***** ipin_to_track_map, t_type_ptr types, int* fc_in);
00237 
00238 static void free_type_track_to_ipin_map(struct s_ivec**** track_to_pin_map, t_type_ptr types, int nodes_per_chan);
00239 
00240 static t_seg_details *alloc_and_load_global_route_seg_details(INP int
00241                                                               nodes_per_chan,
00242                                                               INP int
00243                                                               global_route_switch);
00244 
00245 /* UDSD Modifications by WMF End */
00246 
00247 static int *alloc_and_load_actual_fc(INP int num_types,
00248                                      INP t_type_ptr types,
00249                                      INP int nodes_per_chan,
00250                                      INP boolean is_Fc_out,
00251                                      INP enum e_directionality directionality,
00252                                      OUTP boolean * Fc_clipped);
00253 
00254 /******************* Subroutine definitions *******************************/
00255 
00256 
00257 void
00258 build_rr_graph(INP t_graph_type graph_type,
00259                INP int num_types,
00260                INP t_type_ptr types,
00261                INP int nx,
00262                INP int ny,
00263                INP struct s_grid_tile **grid,
00264                INP int chan_width,
00265                INP struct s_chan_width_dist *chan_capacity_inf,
00266                INP enum e_switch_block_type sb_type,
00267                INP int Fs,
00268                INP int num_seg_types,
00269                    INP int num_switches,
00270                INP t_segment_inf * segment_inf,
00271                INP int global_route_switch,
00272                INP int delayless_switch,
00273                INP t_timing_inf timing_inf,
00274                INP int wire_to_ipin_switch,
00275                INP enum e_base_cost_type base_cost_type,
00276                OUTP int *Warnings)
00277 {
00278     /* Temp structures used to build graph */
00279     int nodes_per_chan, i, j;
00280     t_seg_details *seg_details = NULL;
00281     int *Fc_in = NULL;
00282     int *Fc_out = NULL;
00283    
00284     int *****opin_to_track_map = NULL;  /* [0..num_types-1][0..num_pins-1][0..height][0..3][0..Fc-1] */
00285     int *****ipin_to_track_map = NULL;  /* [0..num_types-1][0..num_pins-1][0..height][0..3][0..Fc-1] */
00286     t_ivec ****track_to_ipin_lookup = NULL;     /* [0..num_types-1][0..nodes_per_chan-1][0..height][0..3] */
00287     t_ivec ***switch_block_conn = NULL;
00288     short *****unidir_sb_pattern = NULL;
00289     boolean *rr_edge_done = NULL;
00290     boolean is_global_graph;
00291     boolean Fc_clipped;
00292     boolean use_full_seg_groups;
00293     boolean *perturb_ipins = NULL;
00294     enum e_directionality directionality;
00295     int **Fc_xofs = NULL;       /* [0..ny-1][0..nx-1] */
00296     int **Fc_yofs = NULL;       /* [0..nx-1][0..ny-1] */
00297 
00298         rr_node_indices = NULL;
00299         rr_node = NULL;
00300         num_rr_nodes = 0;
00301 
00302     /* Reset warning flag */
00303     *Warnings = RR_GRAPH_NO_WARN;
00304 
00305     /* Decode the graph_type */
00306     is_global_graph = FALSE;
00307     if(GRAPH_GLOBAL == graph_type)
00308         {
00309             is_global_graph = TRUE;
00310         }
00311     use_full_seg_groups = FALSE;
00312     if(GRAPH_UNIDIR_TILEABLE == graph_type)
00313         {
00314             use_full_seg_groups = TRUE;
00315         }
00316     directionality = UNI_DIRECTIONAL;
00317     if(GRAPH_BIDIR == graph_type)
00318         {
00319             directionality = BI_DIRECTIONAL;
00320         }
00321     if(is_global_graph)
00322         {
00323             directionality = BI_DIRECTIONAL;
00324         }
00325 
00326 
00327     /* Global routing uses a single longwire track */
00328     nodes_per_chan = (is_global_graph ? 1 : chan_width);
00329     assert(nodes_per_chan > 0);
00330 
00331 
00332 
00333     /* START SEG_DETAILS */
00334     if(is_global_graph)
00335         {
00336             /* Sets up a single unit length segment type for global routing. */
00337             seg_details =
00338                 alloc_and_load_global_route_seg_details(nodes_per_chan,
00339                                                         global_route_switch);
00340         }
00341     else
00342         {
00343             /* Setup segments including distrubuting tracks and staggering.
00344              * If use_full_seg_groups is specified, nodes_per_chan may be 
00345              * changed. Warning should be singled to caller if this happens. */
00346             seg_details = alloc_and_load_seg_details(&nodes_per_chan,
00347                                                      max(nx, ny),
00348                                                      num_seg_types,
00349                                                      segment_inf,
00350                                                      use_full_seg_groups,
00351                                                      is_global_graph,
00352                                                      directionality);
00353             if((is_global_graph ? 1 : chan_width) != nodes_per_chan)
00354                 {
00355                     *Warnings |= RR_GRAPH_WARN_CHAN_WIDTH_CHANGED;
00356                 }
00357 #ifdef CREATE_ECHO_FILES
00358             dump_seg_details(seg_details, nodes_per_chan, "seg_details.txt");
00359 #endif /* CREATE_ECHO_FILES */
00360         }
00361     /* END SEG_DETAILS */
00362 
00363 
00364 
00365     /* START FC */
00366     /* Determine the actual value of Fc */
00367     if(is_global_graph)
00368         {
00369             Fc_in = (int *)my_malloc(sizeof(int) * num_types);
00370             Fc_out = (int *)my_malloc(sizeof(int) * num_types);
00371             for(i = 0; i < num_types; ++i)
00372                 {
00373                     Fc_in[i] = 1;
00374                     Fc_out[i] = 1;
00375                 }
00376         }
00377     else
00378         {
00379             Fc_clipped = FALSE;
00380             Fc_in =
00381                 alloc_and_load_actual_fc(num_types, types, nodes_per_chan,
00382                                          FALSE, directionality, &Fc_clipped);
00383             if(Fc_clipped)
00384                 {
00385                     *Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
00386                 }
00387             Fc_clipped = FALSE;
00388             Fc_out =
00389                 alloc_and_load_actual_fc(num_types, types, nodes_per_chan,
00390                                          TRUE, directionality, &Fc_clipped);
00391             if(Fc_clipped)
00392                 {
00393                     *Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
00394                 }
00395 
00396 
00397 #ifdef VERBOSE
00398             for(i = 1; i < num_types; ++i)
00399                 {               /* Skip "<EMPTY>" */
00400                     if(type_descriptors[i].is_Fc_out_full_flex)
00401                         {
00402                             printf
00403                                 ("Fc Actual Values: Type = %s, Fc_out = full, Fc_in = %d.\n",
00404                                  type_descriptors[i].name, Fc_input[i]);
00405                         }
00406                     else
00407                         {
00408                             printf
00409                                 ("Fc Actual Values: Type = %s, Fc_out = %d, Fc_in = %d.\n",
00410                                  type_descriptors[i].name, Fc_output[i],
00411                                  Fc_input[i]);
00412                         }
00413                 }
00414 #endif /* VERBOSE */
00415         }
00416         
00417         perturb_ipins =
00418                 alloc_and_load_perturb_ipins(nodes_per_chan, num_types, Fc_in,
00419                                              Fc_out, directionality);
00420     /* END FC */
00421 
00422 
00423     /* Alloc node lookups, count nodes, alloc rr nodes */
00424     num_rr_nodes = 0;
00425     rr_node_indices =
00426         alloc_and_load_rr_node_indices(nodes_per_chan, nx, ny, &num_rr_nodes,
00427                                        seg_details);
00428     rr_node = (t_rr_node *) my_malloc(sizeof(t_rr_node) * num_rr_nodes);
00429     memset(rr_node, 0, sizeof(t_rr_node) * num_rr_nodes);
00430     rr_edge_done = (boolean *) my_malloc(sizeof(boolean) * num_rr_nodes);
00431     memset(rr_edge_done, 0, sizeof(boolean) * num_rr_nodes);
00432 
00433     /* These are data structures used by the the unidir opin mapping. */
00434     if(UNI_DIRECTIONAL == directionality)
00435         {
00436             Fc_xofs = (int **)alloc_matrix(0, ny, 0, nx, sizeof(int));
00437             Fc_yofs = (int **)alloc_matrix(0, nx, 0, ny, sizeof(int));
00438             for(i = 0; i <= nx; ++i)
00439                 {
00440                     for(j = 0; j <= ny; ++j)
00441                         {
00442                             Fc_xofs[j][i] = 0;
00443                             Fc_yofs[i][j] = 0;
00444                         }
00445                 }
00446         }
00447 
00448 
00449 
00450     /* START SB LOOKUP */
00451     /* Alloc and load the switch block lookup */
00452     if(is_global_graph)
00453         {
00454             assert(nodes_per_chan == 1);
00455             switch_block_conn =
00456                 alloc_and_load_switch_block_conn(1, SUBSET, 3);
00457         }
00458     else if(BI_DIRECTIONAL == directionality)
00459         {
00460             switch_block_conn =
00461                 alloc_and_load_switch_block_conn(nodes_per_chan, sb_type, Fs);
00462         }
00463     else
00464         {
00465             assert(UNI_DIRECTIONAL == directionality);
00466 
00467             unidir_sb_pattern =
00468                 alloc_sblock_pattern_lookup(nx, ny, nodes_per_chan);
00469             for(i = 0; i <= nx; i++)
00470                 {
00471                     for(j = 0; j <= ny; j++)
00472                         {
00473                             load_sblock_pattern_lookup(i, j, nodes_per_chan,
00474                                                        seg_details, Fs,
00475                                                        sb_type,
00476                                                        unidir_sb_pattern);
00477                         }
00478                 }
00479         }
00480     /* END SB LOOKUP */
00481 
00482 
00483 
00484     /* START IPINP MAP */
00485     /* Create ipin map lookups */
00486     ipin_to_track_map = (int *****)my_malloc(sizeof(int ****) * num_types);
00487     track_to_ipin_lookup =
00488         (struct s_ivec ****)my_malloc(sizeof(struct s_ivec ***) * num_types);
00489     for(i = 0; i < num_types; ++i)
00490         {
00491             ipin_to_track_map[i] = alloc_and_load_pin_to_track_map(RECEIVER,
00492                                                                    nodes_per_chan,
00493                                                                    Fc_in[i],
00494                                                                    &types[i],
00495                                                                    perturb_ipins
00496                                                                    [i],
00497                                                                    directionality);
00498             track_to_ipin_lookup[i] =
00499                 alloc_and_load_track_to_pin_lookup(ipin_to_track_map[i],
00500                                                    Fc_in[i], types[i].height,
00501                                                    types[i].num_pins,
00502                                                    nodes_per_chan);
00503         }
00504     /* END IPINP MAP */
00505 
00506 
00507 
00508     /* START OPINP MAP */
00509     /* Create opin map lookups */
00510     if(BI_DIRECTIONAL == directionality)
00511         {
00512             opin_to_track_map =
00513                 (int *****)my_malloc(sizeof(int ****) * num_types);
00514             for(i = 0; i < num_types; ++i)
00515                 {
00516                     opin_to_track_map[i] =
00517                         alloc_and_load_pin_to_track_map(DRIVER,
00518                                                         nodes_per_chan,
00519                                                         Fc_out[i], &types[i],
00520                                                         FALSE,
00521                                                         directionality);
00522                 }
00523         }
00524     /* END OPINP MAP */
00525 
00526     /* UDSD Modifications by WMF begin */
00527     /* I'm adding 2 new fields to t_rr_node, and I want them initialized to 0. */
00528     for(i = 0; i < num_rr_nodes; i++)
00529         {
00530             rr_node[i].num_wire_drivers = 0;
00531             rr_node[i].num_opin_drivers = 0;
00532         }
00533 
00534 
00535     alloc_and_load_rr_graph(num_rr_nodes, rr_node, num_seg_types,
00536                             seg_details, rr_edge_done,
00537                             track_to_ipin_lookup, opin_to_track_map,
00538                             switch_block_conn, grid, nx,
00539                             ny, Fs, unidir_sb_pattern, Fc_out, Fc_xofs,
00540                             Fc_yofs, rr_node_indices, nodes_per_chan,
00541                             sb_type, delayless_switch, directionality,
00542                             wire_to_ipin_switch, &Fc_clipped);
00543 
00544 
00545 #ifdef MUX_SIZE_DIST_DISPLAY
00546     if(UNI_DIRECTIONAL == directionality)
00547         {
00548             view_mux_size_distribution(rr_node_indices, nodes_per_chan,
00549                                        seg_details, seg_details);
00550         }
00551 #endif
00552 
00553         /* Update rr_nodes capacities if global routing */
00554         if(graph_type == GLOBAL) {
00555                 for(i = 0; i < num_rr_nodes; i++) {
00556                         if(rr_node[i].type == CHANX || rr_node[i].type == CHANY) {
00557                                 rr_node[i].capacity = chan_width;
00558                         }
00559                 }
00560         }
00561 
00562     rr_graph_externals(timing_inf, segment_inf, num_seg_types,
00563                        nodes_per_chan, wire_to_ipin_switch, base_cost_type);
00564 #ifdef CREATE_ECHO_FILES
00565     dump_rr_graph("rr_graph.echo");
00566 #endif /* CREATE_ECHO_FILES */
00567 
00568     check_rr_graph(graph_type, num_types, types, nx, ny,
00569                    grid, nodes_per_chan, Fs, num_seg_types, num_switches, segment_inf,
00570                    global_route_switch, delayless_switch,
00571                    wire_to_ipin_switch, seg_details, Fc_in, Fc_out,
00572                    rr_node_indices, opin_to_track_map, ipin_to_track_map,
00573                    track_to_ipin_lookup, switch_block_conn, perturb_ipins);
00574 
00575     /* Free all temp structs */
00576     if(seg_details)
00577         {
00578             free_seg_details(seg_details, nodes_per_chan);
00579             seg_details = NULL;
00580         }
00581     if(Fc_in)
00582         {
00583             free(Fc_in);
00584             Fc_in = NULL;
00585         }
00586     if(Fc_out)
00587         {
00588             free(Fc_out);
00589             Fc_out = NULL;
00590         }
00591     if(perturb_ipins)
00592         {
00593             free(perturb_ipins);
00594             perturb_ipins = NULL;
00595         }
00596     if(switch_block_conn)
00597         {
00598             free_switch_block_conn(switch_block_conn, nodes_per_chan);
00599             switch_block_conn = NULL;
00600         }
00601     if(rr_edge_done)
00602         {
00603             free(rr_edge_done);
00604             rr_edge_done = NULL;
00605         }
00606     if(Fc_xofs)
00607         {
00608             free_matrix(Fc_xofs, 0, ny, 0, sizeof(int));
00609                 Fc_xofs = NULL;
00610         }
00611     if(Fc_yofs)
00612         {
00613             free_matrix(Fc_yofs, 0, nx, 0, sizeof(int));
00614                 Fc_yofs = NULL;
00615         }
00616         if(unidir_sb_pattern)
00617         {
00618                 free_sblock_pattern_lookup(unidir_sb_pattern);
00619                 unidir_sb_pattern = NULL;
00620         }
00621         if(opin_to_track_map) {
00622             for(i = 0; i < num_types; ++i)
00623                 {
00624                     free_matrix4(opin_to_track_map[i], 0, types[i].num_pins - 1,
00625                       0, types[i].height - 1, 0, 3, 0, sizeof(int));
00626                 }
00627                 free(opin_to_track_map);
00628         }
00629         
00630     free_type_pin_to_track_map(ipin_to_track_map, types, Fc_in);
00631     free_type_track_to_ipin_map(track_to_ipin_lookup, types, nodes_per_chan);
00632 }
00633 
00634 
00635 static void
00636 rr_graph_externals( t_timing_inf timing_inf,
00637                    t_segment_inf * segment_inf,
00638                    int num_seg_types,
00639                    int nodes_per_chan,
00640                    int wire_to_ipin_switch,
00641                    enum e_base_cost_type base_cost_type)
00642 {
00643     add_rr_graph_C_from_switches(timing_inf.C_ipin_cblock);
00644     alloc_and_load_rr_indexed_data(segment_inf, num_seg_types,
00645                                    rr_node_indices, nodes_per_chan,
00646                                    wire_to_ipin_switch, base_cost_type);
00647 
00648     alloc_net_rr_terminals();
00649     load_net_rr_terminals(rr_node_indices);
00650     alloc_and_load_rr_clb_source(rr_node_indices);
00651 }
00652 
00653 
00654 static boolean *
00655 alloc_and_load_perturb_ipins(INP int nodes_per_chan,
00656                              INP int num_types,
00657                              INP int *Fc_in,
00658                              INP int *Fc_out,
00659                              INP enum e_directionality directionality)
00660 {
00661     int i;
00662     float Fc_ratio;
00663     boolean *result = NULL;
00664 
00665     result = (boolean *) my_malloc(num_types * sizeof(boolean));
00666 
00667     if(BI_DIRECTIONAL == directionality)
00668         {
00669             for(i = 0; i < num_types; ++i)
00670                 {
00671                     result[i] = FALSE;
00672 
00673                     if(Fc_in[i] > Fc_out[i])
00674                         {
00675                             Fc_ratio = (float)Fc_in[i] / (float)Fc_out[i];
00676                         }
00677                     else
00678                         {
00679                             Fc_ratio = (float)Fc_out[i] / (float)Fc_in[i];
00680                         }
00681 
00682                     if((Fc_in[i] <= nodes_per_chan - 2) &&
00683                        (fabs(Fc_ratio - nint(Fc_ratio)) <
00684                         (0.5 / (float)nodes_per_chan)))
00685                         {
00686                             result[i] = TRUE;
00687                         }
00688                 }
00689         }
00690     else
00691         {
00692             /* Unidirectional routing uses mux balancing patterns and 
00693              * thus shouldn't need perturbation. */
00694             assert(UNI_DIRECTIONAL == directionality);
00695             for(i = 0; i < num_types; ++i)
00696                 {
00697                     result[i] = FALSE;
00698                 }
00699         }
00700 
00701     return result;
00702 }
00703 
00704 
00705 static t_seg_details *
00706 alloc_and_load_global_route_seg_details(INP int nodes_per_chan,
00707                                         INP int global_route_switch)
00708 {
00709     t_seg_details *result = NULL;
00710 
00711     assert(nodes_per_chan == 1);
00712     result = (t_seg_details *) my_malloc(sizeof(t_seg_details));
00713 
00714     result->index = 0;
00715     result->length = 1;
00716     result->wire_switch = global_route_switch;
00717     result->opin_switch = global_route_switch;
00718     result->longline = FALSE;
00719     result->direction = BI_DIRECTION;
00720     result->Cmetal = 0.0;
00721     result->Rmetal = 0.0;
00722     result->start = 1;
00723     result->drivers = MULTI_BUFFERED;
00724     result->cb = (boolean *) my_malloc(sizeof(boolean) * 1);
00725     result->cb[0] = TRUE;
00726     result->sb = (boolean *) my_malloc(sizeof(boolean) * 2);
00727     result->sb[0] = TRUE;
00728     result->sb[1] = TRUE;
00729         result->group_size = 1;
00730         result->group_start = 0;
00731 
00732     return result;
00733 }
00734 
00735 
00736 
00737 
00738 /** Calculates the actual Fc values for the given nodes_per_chan value */
00739 static int *
00740 alloc_and_load_actual_fc(INP int num_types,
00741                          INP t_type_ptr types,
00742                          INP int nodes_per_chan,
00743                          INP boolean is_Fc_out,
00744                          INP enum e_directionality directionality,
00745                          OUTP boolean * Fc_clipped)
00746 {
00747 
00748     int i;
00749     int *Result = NULL;
00750     int fac, num_sets;
00751     float Fc;
00752 
00753     *Fc_clipped = FALSE;
00754 
00755     /* Unidir tracks formed in pairs, otherwise no effect. */
00756     fac = 1;
00757     if(UNI_DIRECTIONAL == directionality)
00758         {
00759             fac = 2;
00760         }
00761 
00762     assert((nodes_per_chan % fac) == 0);
00763     num_sets = nodes_per_chan / fac;
00764 
00765     Result = (int *)my_malloc(sizeof(int) * num_types);
00766 
00767     for(i = 0; i < num_types; ++i)
00768         {
00769             Fc = (is_Fc_out ? type_descriptors[i].
00770                   Fc_out : type_descriptors[i].Fc_in);
00771 
00772             if(type_descriptors[i].is_Fc_frac)
00773                 {
00774                     Result[i] = fac * nint(num_sets * Fc);
00775                 }
00776             else
00777                 {
00778                     Result[i] = Fc;
00779                 }
00780 
00781             if(is_Fc_out && type_descriptors[i].is_Fc_out_full_flex)
00782                 {
00783                     Result[i] = nodes_per_chan;
00784                 }
00785 
00786             Result[i] = max(Result[i], fac);
00787             if(Result[i] > nodes_per_chan)
00788                 {
00789                     *Fc_clipped = TRUE;
00790                     Result[i] = nodes_per_chan;
00791                 }
00792 
00793             assert(Result[i] % fac == 0);
00794         }
00795 
00796     return Result;
00797 }
00798 
00799 /** frees the track to ipin mapping for each physical grid type */
00800 static void
00801 free_type_track_to_ipin_map(struct s_ivec**** track_to_pin_map, t_type_ptr types, int nodes_per_chan)
00802 {
00803         int i, itrack, ioff, iside;
00804         for(i = 0; i < num_types; i++) {
00805                 if(track_to_pin_map[i] != NULL) {
00806                         for(itrack = 0; itrack < nodes_per_chan; itrack++)
00807                         {
00808                                 for(ioff = 0; ioff < types[i].height; ioff++)
00809                                 {
00810                                         for(iside = 0; iside < 4; iside++)
00811                                         {
00812                                                 if(track_to_pin_map[i][itrack][ioff][iside].list != NULL) {
00813                                                         free(track_to_pin_map[i][itrack][ioff][iside].list);
00814                                                 }
00815                                         }
00816                                 }
00817                         }
00818                         free_matrix3(track_to_pin_map[i], 0, nodes_per_chan - 1, 0,
00819                                                          types[i].height - 1, 0,
00820                                                          sizeof(struct s_ivec));
00821                 }
00822         }
00823         free(track_to_pin_map);
00824 }
00825 
00826 /** frees the ipin to track mapping for each physical grid type */
00827 static void
00828 free_type_pin_to_track_map(int***** ipin_to_track_map, t_type_ptr types, int* fc_in)
00829 {
00830         int i;
00831         for(i = 0; i < num_types; i++) {
00832                 free_matrix4(ipin_to_track_map[i], 0, types[i].num_pins - 1,
00833                                   0, types[i].height - 1, 0, 3, 0, sizeof(int));
00834         }
00835         free(ipin_to_track_map);
00836 }
00837 
00838 /** Does the actual work of allocating the rr_graph and filling all the 
00839  * appropriate values.  Everything up to this was just a prelude!      
00840  */
00841 static void
00842 alloc_and_load_rr_graph(INP int num_nodes,
00843                         INP t_rr_node * rr_node,
00844                         INP int num_seg_types,
00845                         INP t_seg_details * seg_details,
00846                         INP boolean * rr_edge_done,
00847                         INP struct s_ivec ****track_to_ipin_lookup,
00848                         INP int *****opin_to_track_map,
00849                         INP struct s_ivec ***switch_block_conn,
00850                         INP struct s_grid_tile **grid,
00851                         INP int nx,
00852                         INP int ny,
00853                         INP int Fs,
00854                         INP short *****sblock_pattern,
00855                         INP int *Fc_out,
00856                         INP int **Fc_xofs,
00857                         INP int **Fc_yofs,
00858                         INP t_ivec *** rr_node_indices,
00859                         INP int nodes_per_chan,
00860                         INP enum e_switch_block_type sb_type,
00861                         INP int delayless_switch,
00862                         INP enum e_directionality directionality,
00863                         INP int wire_to_ipin_switch,
00864                         OUTP boolean * Fc_clipped)
00865 {
00866 
00867     int i, j;
00868     boolean clipped;
00869     int *opin_mux_size = NULL;
00870 
00871 
00872     /* If Fc gets clipped, this will be flagged to true */
00873     *Fc_clipped = FALSE;
00874 
00875     /* Connection SINKS and SOURCES to their pins. */
00876     for(i = 0; i <= (nx + 1); i++)
00877         {
00878             for(j = 0; j <= (ny + 1); j++)
00879                 {
00880                     build_rr_sinks_sources(i, j, rr_node, rr_node_indices,
00881                                            delayless_switch, grid);
00882                 }
00883         }
00884 
00885     /* Build opins */
00886     for(i = 0; i <= (nx + 1); ++i)
00887         {
00888             for(j = 0; j <= (ny + 1); ++j)
00889                 {
00890                     if(BI_DIRECTIONAL == directionality)
00891                         {
00892                             build_bidir_rr_opins(i, j, rr_node,
00893                                                  rr_node_indices,
00894                                                  opin_to_track_map, Fc_out,
00895                                                  rr_edge_done, seg_details,
00896                                                  grid);
00897                         }
00898                     else
00899                         {
00900                             assert(UNI_DIRECTIONAL == directionality);
00901                             build_unidir_rr_opins(i, j, grid, Fc_out,
00902                                                   nodes_per_chan, seg_details,
00903                                                   Fc_xofs, Fc_yofs, rr_node,
00904                                                   rr_edge_done, &clipped,
00905                                                   rr_node_indices);
00906                             if(clipped)
00907                                 {
00908                                     *Fc_clipped = TRUE;
00909                                 }
00910                         }
00911                 }
00912         }
00913 
00914     /* We make a copy of the current fanin values for the nodes to 
00915      * know the number of OPINs driving each mux presently */
00916     opin_mux_size = (int *)my_malloc(sizeof(int) * num_nodes);
00917     for(i = 0; i < num_nodes; ++i)
00918         {
00919             opin_mux_size[i] = rr_node[i].fan_in;
00920         }
00921 
00922     /* Build channels */
00923     assert(Fs % 3 == 0);
00924     for(i = 0; i <= nx; i++)
00925         {
00926             for(j = 0; j <= ny; j++)
00927                 {
00928                     if(i > 0)
00929                         {
00930                             build_rr_xchan(i, j, track_to_ipin_lookup,
00931                                            switch_block_conn,
00932                                            CHANX_COST_INDEX_START,
00933                                            nodes_per_chan, opin_mux_size,
00934                                            sblock_pattern, Fs / 3,
00935                                            seg_details, rr_node_indices,
00936                                            rr_edge_done, rr_node,
00937                                            wire_to_ipin_switch,
00938                                            directionality);
00939                         }
00940                     if(j > 0)
00941                         {
00942                             build_rr_ychan(i, j, track_to_ipin_lookup,
00943                                            switch_block_conn,
00944                                            CHANX_COST_INDEX_START +
00945                                            num_seg_types, nodes_per_chan,
00946                                            opin_mux_size, sblock_pattern,
00947                                            Fs / 3, seg_details,
00948                                            rr_node_indices, rr_edge_done,
00949                                            rr_node, wire_to_ipin_switch,
00950                                            directionality);
00951                         }
00952                 }
00953         }
00954         free(opin_mux_size);
00955 }
00956 
00957 
00958 
00959 static void
00960 build_bidir_rr_opins(INP int i,
00961                      INP int j,
00962                      INOUTP t_rr_node * rr_node,
00963                      INP t_ivec *** rr_node_indices,
00964                      INP int *****opin_to_track_map,
00965                      INP int *Fc_out,
00966                      INP boolean * rr_edge_done,
00967                      INP t_seg_details * seg_details,
00968                      INP struct s_grid_tile **grid)
00969 {
00970 
00971     int ipin, inode, num_edges, Fc, ofs;
00972     t_type_ptr type;
00973     struct s_linked_edge *edge_list, *next;
00974 
00975     /* OPINP edges need to be done at once so let the offset 0
00976      * block do the work. */
00977     if(grid[i][j].offset > 0)
00978         {
00979             return;
00980         }
00981 
00982     type = grid[i][j].type;
00983     Fc = Fc_out[type->index];
00984 
00985     for(ipin = 0; ipin < type->num_pins; ++ipin)
00986         {
00987             /* We only are working with opins so skip non-drivers */
00988             if(type->class_inf[type->pin_class[ipin]].type != DRIVER)
00989                 {
00990                     continue;
00991                 }
00992 
00993             num_edges = 0;
00994             edge_list = NULL;
00995 
00996             for(ofs = 0; ofs < type->height; ++ofs)
00997                 {
00998                     num_edges +=
00999                         get_bidir_opin_connections(i, j + ofs, ipin,
01000                                                    &edge_list,
01001                                                    opin_to_track_map, Fc,
01002                                                    rr_edge_done,
01003                                                    rr_node_indices,
01004                                                    seg_details);
01005                 }
01006 
01007             inode = get_rr_node_index(i, j, OPIN, ipin, rr_node_indices);
01008             alloc_and_load_edges_and_switches(rr_node, inode, num_edges,
01009                                               rr_edge_done, edge_list);
01010                 while(edge_list != NULL) {
01011                         next = edge_list->next;
01012                         free(edge_list);
01013                         edge_list = next;
01014                 }
01015         }
01016 }
01017 
01018 
01019 /** Frees all the routing graph data structures, if they have been       
01020  * allocated.  I use rr_mem_chunk_list_head as a flag to indicate       
01021  * whether or not the graph has been allocated -- if it is not NULL,    
01022  * a routing graph exists and can be freed.  Hence, you can call this   
01023  * routine even if you're not sure of whether a rr_graph exists or not. 
01024  */
01025 void
01026 free_rr_graph(void)
01027 {
01028     int i;
01029 
01030     if(rr_mem_chunk_list_head == NULL)  /* Nothing to free. */
01031         return;
01032 
01033     free_chunk_memory(rr_mem_chunk_list_head);  /* Frees ALL "chunked" data */
01034     rr_mem_chunk_list_head = NULL;      /* No chunks allocated now. */
01035     chunk_bytes_avail = 0;      /* 0 bytes left in current "chunk". */
01036     chunk_next_avail_mem = NULL;        /* No current chunk.                */
01037 
01038     /* Before adding any more free calls here, be sure the data is NOT chunk *
01039      * allocated, as ALL the chunk allocated data is already free!           */
01040 
01041     free(net_rr_terminals);
01042 
01043         for(i = 0; i < num_rr_nodes; i++) {
01044                 if(rr_node[i].edges != NULL) {
01045                         free(rr_node[i].edges);
01046                 }
01047                 if(rr_node[i].switches != NULL) {
01048                         free(rr_node[i].switches);
01049                 }
01050         }
01051 
01052         assert(rr_node_indices);
01053     free_rr_node_indices(rr_node_indices);
01054     free(rr_node);
01055     free(rr_indexed_data);
01056     for(i = 0; i < num_blocks; i++)
01057         {
01058             free(rr_blk_source[i]);
01059         }
01060     free(rr_blk_source);
01061     rr_blk_source = NULL;
01062     net_rr_terminals = NULL;
01063     rr_node = NULL;
01064         rr_node_indices = NULL;
01065     rr_indexed_data = NULL;
01066 }
01067 
01068 
01069 static void
01070 alloc_net_rr_terminals(void)
01071 {
01072     int inet;
01073 
01074     net_rr_terminals = (int **)my_malloc(num_nets * sizeof(int *));
01075 
01076     for(inet = 0; inet < num_nets; inet++)
01077         {
01078             net_rr_terminals[inet] =
01079                 (int *)my_chunk_malloc((clb_net[inet].num_sinks + 1) *
01080                                        sizeof(int), &rr_mem_chunk_list_head,
01081                                        &chunk_bytes_avail,
01082                                        &chunk_next_avail_mem);
01083         }
01084 }
01085 
01086 /** Allocates and loads the net_rr_terminals data structure.  For each net   
01087  * it stores the rr_node index of the SOURCE of the net and all the SINKs   
01088  * of the net.  [0..num_nets-1][0..num_pins-1].  Entry [inet][pnum] stores  
01089  * the rr index corresponding to the SOURCE (opin) or SINK (ipin) of pnum.  
01090  */
01091 void
01092 load_net_rr_terminals(t_ivec *** rr_node_indices)
01093 {
01094 
01095     int inet, ipin, inode, iblk, i, j, k, node_block_pin, iclass;
01096     t_type_ptr type;
01097 
01098     for(inet = 0; inet < num_nets; inet++)
01099         {
01100             for(ipin = 0; ipin <= clb_net[inet].num_sinks; ipin++)
01101                 {
01102                     iblk = clb_net[inet].node_block[ipin];
01103                     i = block[iblk].x;
01104                     j = block[iblk].y;
01105                     k = block[iblk].z;
01106                     type = block[iblk].type;
01107 
01108                     /* In the routing graph, each (x, y) location has unique pins on it
01109                      * so when there is capacity, blocks are packed and their pin numbers
01110                      * are offset to get their actual rr_node */
01111                     node_block_pin = clb_net[inet].node_block_pin[ipin];
01112 
01113                     iclass = type->pin_class[node_block_pin];
01114 
01115                     inode = get_rr_node_index(i, j, (ipin == 0 ? SOURCE : SINK),        /* First pin is driver */
01116                                               iclass, rr_node_indices);
01117                     net_rr_terminals[inet][ipin] = inode;
01118                 }
01119         }
01120 }
01121 
01122 /** Saves the rr_node corresponding to each SOURCE and SINK in each CLB      
01123  * in the FPGA.  Currently only the SOURCE rr_node values are used, and     
01124  * they are used only to reserve pins for locally used OPINs in the router. 
01125  * [0..num_blocks-1][0..num_class-1].  The values for blocks that are pads  
01126  * are NOT valid.                                                           
01127  */
01128 static void
01129 alloc_and_load_rr_clb_source(t_ivec *** rr_node_indices)
01130 {
01131 
01132     int iblk, i, j, iclass, inode;
01133     int class_low, class_high;
01134     t_rr_type rr_type;
01135     t_type_ptr type;
01136 
01137     rr_blk_source = (int **)my_malloc(num_blocks * sizeof(int *));
01138 
01139     for(iblk = 0; iblk < num_blocks; iblk++)
01140         {
01141             type = block[iblk].type;
01142             get_class_range_for_block(iblk, &class_low, &class_high);
01143             rr_blk_source[iblk] =
01144                 (int *)my_malloc(type->num_class * sizeof(int));
01145             for(iclass = 0; iclass < type->num_class; iclass++)
01146                 {
01147                     if(iclass >= class_low && iclass <= class_high)
01148                         {
01149                             i = block[iblk].x;
01150                             j = block[iblk].y;
01151 
01152                             if(type->class_inf[iclass].type == DRIVER)
01153                                 rr_type = SOURCE;
01154                             else
01155                                 rr_type = SINK;
01156 
01157                             inode =
01158                                 get_rr_node_index(i, j, rr_type, iclass,
01159                                                   rr_node_indices);
01160                             rr_blk_source[iblk][iclass] = inode;
01161                         }
01162                     else
01163                         {
01164                             rr_blk_source[iblk][iclass] = OPEN;
01165                         }
01166                 }
01167         }
01168 }
01169 
01170 /* Loads IPIN, SINK, SOURCE, and OPIN. 
01171  * Loads IPINP to SINK edges, and SOURCE to OPINP edges 
01172  */
01173 static void
01174 build_rr_sinks_sources(INP int i,
01175                        INP int j,
01176                        INP t_rr_node * rr_node,
01177                        INP t_ivec *** rr_node_indices,
01178                        INP int delayless_switch,
01179                        INP struct s_grid_tile **grid)
01180 {
01181 
01182     int ipin, iclass, inode, pin_num, to_node, num_edges;
01183     int num_class, num_pins;
01184     t_type_ptr type;
01185     struct s_class *class_inf;
01186     int *pin_class;
01187 
01188     /* Since we share nodes within a large block, only 
01189      * start tile can initialize sinks, sources, and pins */
01190     if(grid[i][j].offset > 0)
01191         return;
01192 
01193     type = grid[i][j].type;
01194     num_class = type->num_class;
01195     class_inf = type->class_inf;
01196     num_pins = type->num_pins;
01197     pin_class = type->pin_class;
01198 
01199     /* SINKS and SOURCE to OPINP edges */
01200     for(iclass = 0; iclass < num_class; iclass++)
01201         {
01202             if(class_inf[iclass].type == DRIVER)
01203                 {               /* SOURCE */
01204                     inode =
01205                         get_rr_node_index(i, j, SOURCE, iclass,
01206                                           rr_node_indices);
01207 
01208                     num_edges = class_inf[iclass].num_pins;
01209                     rr_node[inode].num_edges = num_edges;
01210                     rr_node[inode].edges =
01211                         (int *)my_malloc(num_edges * sizeof(int));
01212                     rr_node[inode].switches =
01213                         (short *)my_malloc(num_edges * sizeof(short));
01214 
01215                     for(ipin = 0; ipin < class_inf[iclass].num_pins; ipin++)
01216                         {
01217                             pin_num = class_inf[iclass].pinlist[ipin];
01218                             to_node =
01219                                 get_rr_node_index(i, j, OPIN, pin_num,
01220                                                   rr_node_indices);
01221                             rr_node[inode].edges[ipin] = to_node;
01222                             rr_node[inode].switches[ipin] = delayless_switch;
01223 
01224                             ++rr_node[to_node].fan_in;
01225                         }
01226 
01227                     rr_node[inode].cost_index = SOURCE_COST_INDEX;
01228                     rr_node[inode].type = SOURCE;
01229                 }
01230             else
01231                 {               /* SINK */
01232                     assert(class_inf[iclass].type == RECEIVER);
01233                     inode =
01234                         get_rr_node_index(i, j, SINK, iclass,
01235                                           rr_node_indices);
01236 
01237                     /* NOTE:  To allow route throughs through clbs, change the lines below to  *
01238                      * make an edge from the input SINK to the output SOURCE.  Do for just the *
01239                      * special case of INPUTS = class 0 and OUTPUTS = class 1 and see what it  *
01240                      * leads to.  If route throughs are allowed, you may want to increase the  *
01241                      * base cost of OPINs and/or SOURCES so they aren't used excessively.      */
01242 
01243                     /* Initialize to unconnected to fix values */
01244                     rr_node[inode].num_edges = 0;
01245                     rr_node[inode].edges = NULL;
01246                     rr_node[inode].switches = NULL;
01247 
01248                     rr_node[inode].cost_index = SINK_COST_INDEX;
01249                     rr_node[inode].type = SINK;
01250                 }
01251 
01252             /* Things common to both SOURCEs and SINKs.   */
01253             rr_node[inode].capacity = class_inf[iclass].num_pins;
01254             rr_node[inode].occ = 0;
01255             rr_node[inode].xlow = i;
01256             rr_node[inode].xhigh = i;
01257             rr_node[inode].ylow = j;
01258             rr_node[inode].yhigh = j + type->height - 1;
01259             rr_node[inode].R = 0;
01260             rr_node[inode].C = 0;
01261             rr_node[inode].ptc_num = iclass;
01262             rr_node[inode].direction = OPEN;
01263             rr_node[inode].drivers = OPEN;
01264         }
01265 
01266     /* Connect IPINS to SINKS and dummy for OPINS */
01267     for(ipin = 0; ipin < num_pins; ipin++)
01268         {
01269             iclass = pin_class[ipin];
01270 
01271             if(class_inf[iclass].type == RECEIVER)
01272                 {
01273                     inode =
01274                         get_rr_node_index(i, j, IPIN, ipin, rr_node_indices);
01275                     to_node =
01276                         get_rr_node_index(i, j, SINK, iclass,
01277                                           rr_node_indices);
01278 
01279                     rr_node[inode].num_edges = 1;
01280                     rr_node[inode].edges = (int *)my_malloc(sizeof(int));
01281                     rr_node[inode].switches =
01282                         (short *)my_malloc(sizeof(short));
01283 
01284                     rr_node[inode].edges[0] = to_node;
01285                     rr_node[inode].switches[0] = delayless_switch;
01286 
01287                     ++rr_node[to_node].fan_in;
01288 
01289                     rr_node[inode].cost_index = IPIN_COST_INDEX;
01290                     rr_node[inode].type = IPIN;
01291                 }
01292             else
01293                 {
01294                     assert(class_inf[iclass].type == DRIVER);
01295 
01296                     inode =
01297                         get_rr_node_index(i, j, OPIN, ipin, rr_node_indices);
01298 
01299                     rr_node[inode].num_edges = 0;
01300                     rr_node[inode].edges = NULL;
01301 
01302                     rr_node[inode].switches = NULL;
01303 
01304                     rr_node[inode].cost_index = OPIN_COST_INDEX;
01305                     rr_node[inode].type = OPIN;
01306                 }
01307 
01308             /* Common to both DRIVERs and RECEIVERs */
01309             rr_node[inode].capacity = 1;
01310             rr_node[inode].occ = 0;
01311             rr_node[inode].xlow = i;
01312             rr_node[inode].xhigh = i;
01313             rr_node[inode].ylow = j;
01314             rr_node[inode].yhigh = j + type->height - 1;
01315             rr_node[inode].C = 0;
01316             rr_node[inode].R = 0;
01317             rr_node[inode].ptc_num = ipin;
01318             rr_node[inode].direction = OPEN;
01319             rr_node[inode].drivers = OPEN;
01320         }
01321 }
01322 
01323 
01324  /* Loads up all the routing resource nodes in the x-directed channel      
01325   * segments starting at (i,j).                                            
01326   */
01327 static void
01328 build_rr_xchan(INP int i,
01329                INP int j,
01330                INP struct s_ivec ****track_to_ipin_lookup,
01331                INP struct s_ivec ***switch_block_conn,
01332                INP int cost_index_offset,
01333                INP int nodes_per_chan,
01334                INP int *opin_mux_size,
01335                INP short *****sblock_pattern,
01336                INP int Fs_per_side,
01337                INP t_seg_details * seg_details,
01338                INP t_ivec *** rr_node_indices,
01339                INOUTP boolean * rr_edge_done,
01340                INOUTP t_rr_node * rr_node,
01341                INP int wire_to_ipin_switch,
01342                INP enum e_directionality directionality)
01343 {
01344 
01345     int itrack, istart, iend, num_edges, inode, length;
01346     struct s_linked_edge *edge_list, *next;
01347 
01348 
01349     for(itrack = 0; itrack < nodes_per_chan; itrack++)
01350         {
01351             istart = get_seg_start(seg_details, itrack, j, i);
01352             iend = get_seg_end(seg_details, itrack, istart, j, nx);
01353 
01354             if(i > istart)
01355                 continue;       /* Not the start of this segment. */
01356 
01357             edge_list = NULL;
01358 
01359             /* First count number of edges and put the edges in a linked list. */
01360             num_edges = 0;
01361 
01362             num_edges += get_track_to_ipins(istart, j, itrack,
01363                                             &edge_list,
01364                                             rr_node_indices,
01365                                             track_to_ipin_lookup,
01366                                             seg_details,
01367                                             CHANX, nx,
01368                                             wire_to_ipin_switch,
01369                                             directionality);
01370 
01371             if(j > 0)
01372                 {
01373                     num_edges += get_track_to_tracks(j, istart, itrack, CHANX,
01374                                                      j, CHANY, nx,
01375                                                      nodes_per_chan,
01376                                                      opin_mux_size,
01377                                                      Fs_per_side,
01378                                                      sblock_pattern,
01379                                                      &edge_list,
01380                                                      seg_details,
01381                                                      directionality,
01382                                                      rr_node_indices,
01383                                                      rr_edge_done,
01384                                                      switch_block_conn);
01385                 }
01386 
01387             if(j < ny)
01388                 {
01389                     num_edges += get_track_to_tracks(j, istart, itrack, CHANX,
01390                                                      j + 1, CHANY, nx,
01391                                                      nodes_per_chan,
01392                                                      opin_mux_size,
01393                                                      Fs_per_side,
01394                                                      sblock_pattern,
01395                                                      &edge_list,
01396                                                      seg_details,
01397                                                      directionality,
01398                                                      rr_node_indices,
01399                                                      rr_edge_done,
01400                                                      switch_block_conn);
01401                 }
01402 
01403             if(istart > 1)
01404                 {
01405                     num_edges += get_track_to_tracks(j, istart, itrack, CHANX,
01406                                                      istart - 1, CHANX, nx,
01407                                                      nodes_per_chan,
01408                                                      opin_mux_size,
01409                                                      Fs_per_side,
01410                                                      sblock_pattern,
01411                                                      &edge_list,
01412                                                      seg_details,
01413                                                      directionality,
01414                                                      rr_node_indices,
01415                                                      rr_edge_done,
01416                                                      switch_block_conn);
01417                 }
01418 
01419             if(iend < nx)
01420                 {
01421                     num_edges += get_track_to_tracks(j, istart, itrack, CHANX,
01422                                                      iend + 1, CHANX, nx,
01423                                                      nodes_per_chan,
01424                                                      opin_mux_size,
01425                                                      Fs_per_side,
01426                                                      sblock_pattern,
01427                                                      &edge_list,
01428                                                      seg_details,
01429                                                      directionality,
01430                                                      rr_node_indices,
01431                                                      rr_edge_done,
01432                                                      switch_block_conn);
01433                 }
01434 
01435 
01436             inode = get_rr_node_index(i, j, CHANX, itrack, rr_node_indices);
01437             alloc_and_load_edges_and_switches(rr_node, inode, num_edges,
01438                                               rr_edge_done, edge_list);
01439 
01440                 while(edge_list != NULL) {
01441                         next = edge_list->next;
01442                         free(edge_list);
01443                         edge_list = next;
01444                 }
01445 
01446             /* Edge arrays have now been built up.  Do everything else.  */
01447             rr_node[inode].cost_index =
01448                 cost_index_offset + seg_details[itrack].index;
01449             rr_node[inode].occ = 0;
01450 
01451             rr_node[inode].capacity = 1;        /* GLOBAL routing handled elsewhere */
01452 
01453             rr_node[inode].xlow = istart;
01454             rr_node[inode].xhigh = iend;
01455             rr_node[inode].ylow = j;
01456             rr_node[inode].yhigh = j;
01457 
01458             length = iend - istart + 1;
01459             rr_node[inode].R = length * seg_details[itrack].Rmetal;
01460             rr_node[inode].C = length * seg_details[itrack].Cmetal;
01461 
01462             rr_node[inode].ptc_num = itrack;
01463             rr_node[inode].type = CHANX;
01464             rr_node[inode].direction = seg_details[itrack].direction;
01465             rr_node[inode].drivers = seg_details[itrack].drivers;
01466         }
01467 }
01468 
01469 
01470 
01471 /** Loads up all the routing resource nodes in the y-directed channel      
01472  * segments starting at (i,j).                                            
01473  */
01474 static void
01475 build_rr_ychan(INP int i,
01476                INP int j,
01477                INP struct s_ivec ****track_to_ipin_lookup,
01478                INP struct s_ivec ***switch_block_conn,
01479                INP int cost_index_offset,
01480                INP int nodes_per_chan,
01481                INP int *opin_mux_size,
01482                INP short *****sblock_pattern,
01483                INP int Fs_per_side,
01484                INP t_seg_details * seg_details,
01485                INP t_ivec *** rr_node_indices,
01486                INP boolean * rr_edge_done,
01487                INOUTP t_rr_node * rr_node,
01488                INP int wire_to_ipin_switch,
01489                INP enum e_directionality directionality)
01490 {
01491 
01492     int itrack, istart, iend, num_edges, inode, length;
01493     struct s_linked_edge *edge_list, *next;
01494 
01495 
01496     for(itrack = 0; itrack < nodes_per_chan; itrack++)
01497         {
01498             istart = get_seg_start(seg_details, itrack, i, j);
01499             iend = get_seg_end(seg_details, itrack, istart, i, ny);
01500 
01501             if(j > istart)
01502                 continue;       /* Not the start of this segment. */
01503 
01504             edge_list = NULL;
01505 
01506             /* First count number of edges and put the edges in a linked list. */
01507             num_edges = 0;
01508 
01509             num_edges += get_track_to_ipins(istart, i, itrack,
01510                                             &edge_list,
01511                                             rr_node_indices,
01512                                             track_to_ipin_lookup,
01513                                             seg_details,
01514                                             CHANY, ny,
01515                                             wire_to_ipin_switch,
01516                                             directionality);
01517 
01518             if(i > 0)
01519                 {
01520                     num_edges += get_track_to_tracks(i, istart, itrack, CHANY,
01521                                                      i, CHANX, ny,
01522                                                      nodes_per_chan,
01523                                                      opin_mux_size,
01524                                                      Fs_per_side,
01525                                                      sblock_pattern,
01526                                                      &edge_list,
01527                                                      seg_details,
01528                                                      directionality,
01529                                                      rr_node_indices,
01530                                                      rr_edge_done,
01531                                                      switch_block_conn);
01532                 }
01533 
01534             if(i < nx)
01535                 {
01536                     num_edges += get_track_to_tracks(i, istart, itrack, CHANY,
01537                                                      i + 1, CHANX, ny,
01538                                                      nodes_per_chan,
01539                                                      opin_mux_size,
01540                                                      Fs_per_side,
01541                                                      sblock_pattern,
01542                                                      &edge_list,
01543                                                      seg_details,
01544                                                      directionality,
01545                                                      rr_node_indices,
01546                                                      rr_edge_done,
01547                                                      switch_block_conn);
01548                 }
01549 
01550             if(istart > 1)
01551                 {
01552                     num_edges += get_track_to_tracks(i, istart, itrack, CHANY,
01553                                                      istart - 1, CHANY, ny,
01554                                                      nodes_per_chan,
01555                                                      opin_mux_size,
01556                                                      Fs_per_side,
01557                                                      sblock_pattern,
01558                                                      &edge_list,
01559                                                      seg_details,
01560                                                      directionality,
01561                                                      rr_node_indices,
01562                                                      rr_edge_done,
01563                                                      switch_block_conn);
01564                 }
01565 
01566             if(iend < ny)
01567                 {
01568                     num_edges += get_track_to_tracks(i, istart, itrack, CHANY,
01569                                                      iend + 1, CHANY, ny,
01570                                                      nodes_per_chan,
01571                                                      opin_mux_size,
01572                                                      Fs_per_side,
01573                                                      sblock_pattern,
01574                                                      &edge_list,
01575                                                      seg_details,
01576                                                      directionality,
01577                                                      rr_node_indices,
01578                                                      rr_edge_done,
01579                                                      switch_block_conn);
01580                 }
01581 
01582 
01583             inode = get_rr_node_index(i, j, CHANY, itrack, rr_node_indices);
01584             alloc_and_load_edges_and_switches(rr_node, inode, num_edges,
01585                                               rr_edge_done, edge_list);
01586 
01587                 while(edge_list != NULL) {
01588                         next = edge_list->next;
01589                         free(edge_list);
01590                         edge_list = next;
01591                 }
01592 
01593             /* Edge arrays have now been built up.  Do everything else.  */
01594             rr_node[inode].cost_index =
01595                 cost_index_offset + seg_details[itrack].index;
01596             rr_node[inode].occ = 0;
01597 
01598             rr_node[inode].capacity = 1;        /* GLOBAL routing handled elsewhere */
01599 
01600             rr_node[inode].xlow = i;
01601             rr_node[inode].xhigh = i;
01602             rr_node[inode].ylow = istart;
01603             rr_node[inode].yhigh = iend;
01604 
01605             length = iend - istart + 1;
01606             rr_node[inode].R = length * seg_details[itrack].Rmetal;
01607             rr_node[inode].C = length * seg_details[itrack].Cmetal;
01608 
01609             rr_node[inode].ptc_num = itrack;
01610             rr_node[inode].type = CHANY;
01611             rr_node[inode].direction = seg_details[itrack].direction;
01612             rr_node[inode].drivers = seg_details[itrack].drivers;
01613         }
01614 }
01615 
01616 void
01617 watch_edges(int inode,
01618             t_linked_edge * edge_list_head)
01619 {
01620     t_linked_edge *list_ptr;
01621     int i, to_node;
01622 
01623     list_ptr = edge_list_head;
01624     i = 0;
01625 
01626     printf("!!! Watching Node %d !!!!\n", inode);
01627     print_rr_node(stdout, rr_node, inode);
01628     printf("Currently connects to: \n");
01629     while(list_ptr != NULL)
01630         {
01631             to_node = list_ptr->edge;
01632             print_rr_node(stdout, rr_node, to_node);
01633             list_ptr = list_ptr->next;
01634             i++;
01635         }
01636 }
01637 
01638 /** Sets up all the edge related information for rr_node inode (num_edges,  
01639  * the edges array and the switches array).  The edge_list_head points to  
01640  * a list of the num_edges edges and switches to put in the arrays.  This  
01641  * linked list is freed by this routine. This routine also resets the      
01642  * rr_edge_done array for the next rr_node (i.e. set it so that no edges   
01643  * are marked as having been seen before).                                 
01644  */
01645 void
01646 alloc_and_load_edges_and_switches(INP t_rr_node * rr_node,
01647                                   INP int inode,
01648                                   INP int num_edges,
01649                                   INOUTP boolean * rr_edge_done,
01650                                   INP t_linked_edge * edge_list_head)
01651 {
01652 
01653     t_linked_edge *list_ptr, *prev_ptr;
01654     int i;
01655 
01656     /* Check we aren't overwriting edges */
01657     assert(rr_node[inode].num_edges < 1);
01658     assert(NULL == rr_node[inode].edges);
01659     assert(NULL == rr_node[inode].switches);
01660 
01661     rr_node[inode].num_edges = num_edges;
01662     rr_node[inode].edges = (int *)my_malloc(num_edges * sizeof(int));
01663     rr_node[inode].switches = (short *)my_malloc(num_edges * sizeof(short));
01664 
01665     i = 0;
01666     list_ptr = edge_list_head;
01667     while(list_ptr && (i < num_edges))
01668         {
01669             rr_node[inode].edges[i] = list_ptr->edge;
01670             rr_node[inode].switches[i] = list_ptr->iswitch;
01671 
01672             ++rr_node[list_ptr->edge].fan_in;
01673 
01674             /* Unmark the edge since we are done considering fanout from node. */
01675             rr_edge_done[list_ptr->edge] = FALSE;
01676 
01677             prev_ptr = list_ptr;
01678             list_ptr = list_ptr->next;
01679             ++i;
01680         }
01681     assert(list_ptr == NULL);
01682     assert(i == num_edges);
01683 }
01684 
01685 
01686 
01687 
01688 
01689 
01690 
01691 static int ****
01692 alloc_and_load_pin_to_track_map(INP enum e_pin_type pin_type,
01693                                 INP int nodes_per_chan,
01694                                 INP int Fc,
01695                                 INP t_type_ptr Type,
01696                                 INP boolean perturb_switch_pattern,
01697                                 INP enum e_directionality directionality)
01698 {
01699 
01700     int **num_dir;              /* [0..height][0..3] Number of *physical* pins on each side.          */
01701     int ***dir_list;            /* [0..height][0..3][0..num_pins-1] list of pins of correct type  *
01702                                  * * on each side. Max possible space alloced for simplicity */
01703 
01704     int i, j, k, iside, ipin, iclass, num_phys_pins, pindex, ioff;
01705     int *pin_num_ordering, *side_ordering, *offset_ordering;
01706     int **num_done_per_dir;     /* [0..height][0..3] */
01707     int ****tracks_connected_to_pin;    /* [0..num_pins-1][0..height][0..3][0..Fc-1] */
01708 
01709     /* NB:  This wastes some space.  Could set tracks_..._pin[ipin][ioff][iside] = 
01710      * NULL if there is no pin on that side, or that pin is of the wrong type. 
01711      * Probably not enough memory to worry about, esp. as it's temporary.      
01712      * If pin ipin on side iside does not exist or is of the wrong type,       
01713      * tracks_connected_to_pin[ipin][iside][0] = OPEN.                               */
01714 
01715     if(Type->num_pins < 1)
01716         {
01717             return NULL;
01718         }
01719 
01720     tracks_connected_to_pin = (int ****)
01721         alloc_matrix4(0, Type->num_pins - 1,
01722                       0, Type->height - 1, 0, 3, 0, Fc - 1, sizeof(int));
01723 
01724     for(ipin = 0; ipin < Type->num_pins; ipin++)
01725         {
01726             for(ioff = 0; ioff < Type->height; ioff++)
01727                 {
01728                     for(iside = 0; iside < 4; iside++)
01729                         {
01730                             for(i = 0; i < Fc; ++i)
01731                                 {
01732                                     tracks_connected_to_pin[ipin][ioff][iside][i] = OPEN;       /* Unconnected. */
01733                                 }
01734                         }
01735                 }
01736         }
01737 
01738     num_dir = (int **)alloc_matrix(0, Type->height - 1, 0, 3, sizeof(int));
01739     dir_list =
01740         (int ***)alloc_matrix3(0, Type->height - 1, 0, 3, 0,
01741                                Type->num_pins - 1, sizeof(int));
01742 
01743     /* Defensive coding.  Try to crash hard if I use an unset entry.  */
01744     for(i = 0; i < Type->height; i++)
01745         for(j = 0; j < 4; j++)
01746             for(k = 0; k < Type->num_pins; k++)
01747                 dir_list[i][j][k] = (-1);
01748 
01749     for(i = 0; i < Type->height; i++)
01750         for(j = 0; j < 4; j++)
01751             num_dir[i][j] = 0;
01752 
01753     for(ipin = 0; ipin < Type->num_pins; ipin++)
01754         {
01755             iclass = Type->pin_class[ipin];
01756             if(Type->class_inf[iclass].type != pin_type)        /* Doing either ipins OR opins */
01757                 continue;
01758 
01759             /* Pins connecting only to global resources get no switches -> keeps the    *
01760              * area model accurate.                                                     */
01761 
01762             if(Type->is_global_pin[ipin])
01763                 continue;
01764             for(ioff = 0; ioff < Type->height; ioff++)
01765                 {
01766                     for(iside = 0; iside < 4; iside++)
01767                         {
01768                             if(Type->pinloc[ioff][iside][ipin] == 1)
01769                                 {
01770                                     dir_list[ioff][iside][num_dir[ioff]
01771                                                           [iside]] = ipin;
01772                                     num_dir[ioff][iside]++;
01773                                 }
01774                         }
01775                 }
01776         }
01777 
01778     num_phys_pins = 0;
01779     for(ioff = 0; ioff < Type->height; ioff++)
01780         {
01781             for(iside = 0; iside < 4; iside++)
01782                 num_phys_pins += num_dir[ioff][iside];  /* Num. physical pins per type */
01783         }
01784     num_done_per_dir =
01785         (int **)alloc_matrix(0, Type->height - 1, 0, 3, sizeof(int));
01786     for(ioff = 0; ioff < Type->height; ioff++)
01787         {
01788             for(iside = 0; iside < 4; iside++)
01789                 {
01790                     num_done_per_dir[ioff][iside] = 0;
01791                 }
01792         }
01793     pin_num_ordering = (int *)my_malloc(num_phys_pins * sizeof(int));
01794     side_ordering = (int *)my_malloc(num_phys_pins * sizeof(int));
01795     offset_ordering = (int *)my_malloc(num_phys_pins * sizeof(int));
01796 
01797     /* Connection block I use distributes pins evenly across the tracks      *
01798      * of ALL sides of the clb at once.  Ensures that each pin connects      *
01799      * to spaced out tracks in its connection block, and that the other      *
01800      * pins (potentially in other C blocks) connect to the remaining tracks  *
01801      * first.  Doesn't matter for large Fc, but should make a fairly         *
01802      * good low Fc block that leverages the fact that usually lots of pins   *
01803      * are logically equivalent.                                             */
01804 
01805     iside = LEFT;
01806     ioff = Type->height - 1;
01807     ipin = 0;
01808     pindex = -1;
01809 
01810     while(ipin < num_phys_pins)
01811         {
01812             if(iside == TOP)
01813                 {
01814                     iside = RIGHT;
01815                 }
01816             else if(iside == RIGHT)
01817                 {
01818                     if(ioff <= 0)
01819                         {
01820                             iside = BOTTOM;
01821                         }
01822                     else
01823                         {
01824                             ioff--;
01825                         }
01826                 }
01827             else if(iside == BOTTOM)
01828                 {
01829                     iside = LEFT;
01830                 }
01831             else
01832                 {
01833                     assert(iside == LEFT);
01834                     if(ioff >= Type->height - 1)
01835                         {
01836                             pindex++;
01837                             iside = TOP;
01838                         }
01839                     else
01840                         {
01841                             ioff++;
01842                         }
01843                 }
01844 
01845             assert(pindex < num_phys_pins);     /* Number of physical pins bounds number of logical pins */
01846 
01847             if(num_done_per_dir[ioff][iside] >= num_dir[ioff][iside])
01848                 continue;
01849             pin_num_ordering[ipin] = dir_list[ioff][iside][pindex];
01850             side_ordering[ipin] = iside;
01851             offset_ordering[ipin] = ioff;
01852             assert(Type->pinloc[ioff][iside][dir_list[ioff][iside][pindex]]);
01853             num_done_per_dir[ioff][iside]++;
01854             ipin++;
01855         }
01856 
01857     if(perturb_switch_pattern)
01858         {
01859             load_perturbed_switch_pattern(Type,
01860                                           tracks_connected_to_pin,
01861                                           num_phys_pins,
01862                                           pin_num_ordering,
01863                                           side_ordering,
01864                                           offset_ordering,
01865                                           nodes_per_chan, Fc, directionality);
01866         }
01867     else
01868         {
01869             load_uniform_switch_pattern(Type,
01870                                         tracks_connected_to_pin,
01871                                         num_phys_pins,
01872                                         pin_num_ordering,
01873                                         side_ordering,
01874                                         offset_ordering,
01875                                         nodes_per_chan, Fc, directionality);
01876         }
01877     check_all_tracks_reach_pins(Type, tracks_connected_to_pin,
01878                                 nodes_per_chan, Fc, pin_type);
01879 
01880     /* Free all temporary storage. */
01881     free_matrix(num_dir, 0, Type->height - 1, 0, sizeof(int));
01882     free_matrix3(dir_list, 0, Type->height - 1, 0, 3, 0, sizeof(int));
01883     free_matrix(num_done_per_dir, 0, Type->height - 1, 0, sizeof(int));
01884     free(pin_num_ordering);
01885     free(side_ordering);
01886     free(offset_ordering);
01887 
01888     return tracks_connected_to_pin;
01889 }
01890 
01891 
01892 
01893 /** Loads the tracks_connected_to_pin array with an even distribution of     
01894  * switches across the tracks for each pin.  For example, each pin connects 
01895  * to every 4.3rd track in a channel, with exactly which tracks a pin      
01896  * connects to staggered from pin to pin.                                   
01897  */
01898 static void
01899 load_uniform_switch_pattern(INP t_type_ptr type,
01900                             INOUTP int ****tracks_connected_to_pin,
01901                             INP int num_phys_pins,
01902                             INP int *pin_num_ordering,
01903                             INP int *side_ordering,
01904                             INP int *offset_ordering,
01905                             INP int nodes_per_chan,
01906                             INP int Fc,
01907                             enum e_directionality directionality)
01908 {
01909 
01910     int i, j, ipin, iside, ioff, itrack, k;
01911     float f_track, fc_step;
01912     int group_size;
01913     float step_size;
01914 
01915     /* Uni-directional drive is implemented to ensure no directional bias and this means 
01916      * two important comments noted below                                                */
01917     /* 1. Spacing should be (W/2)/(Fc/2), and step_size should be spacing/(num_phys_pins),
01918      *    and lay down 2 switches on an adjacent pair of tracks at a time to ensure
01919      *    no directional bias. Basically, treat W (even) as W/2 pairs of tracks, and
01920      *    assign switches to a pair at a time. Can do this because W is guaranteed to 
01921      *    be even-numbered; however same approach cannot be applied to Fc_out pattern
01922      *    when L > 1 and W <> 2L multiple. 
01923      *
01924      * 2. This generic pattern should be considered the tileable physical layout,
01925      *    meaning all track # here are physical #'s,
01926      *    so later must use vpr_to_phy conversion to find actual logical #'s to connect.
01927      *    This also means I will not use get_output_block_companion_track to ensure
01928      *    no bias, since that describes a logical # -> that would confuse people.  */
01929 
01930     step_size = (float)nodes_per_chan / (float)(Fc * num_phys_pins);
01931 
01932     if(directionality == BI_DIRECTIONAL)
01933         {
01934             group_size = 1;
01935         }
01936     else
01937         {
01938             assert(directionality == UNI_DIRECTIONAL);
01939             group_size = 2;
01940         }
01941 
01942     assert((nodes_per_chan % group_size == 0) && (Fc % group_size == 0));
01943 
01944     fc_step = (float)nodes_per_chan / (float)Fc;
01945 
01946     for(i = 0; i < num_phys_pins; i++)
01947         {
01948             ipin = pin_num_ordering[i];
01949             iside = side_ordering[i];
01950             ioff = offset_ordering[i];
01951 
01952             /* Bi-directional treats each track separately, uni-directional works with pairs of tracks */
01953             for(j = 0; j < (Fc / group_size); j++)
01954                 {
01955                     f_track = (i * step_size) + (j * fc_step);
01956                     itrack = ((int)f_track) * group_size;
01957 
01958                     /* Catch possible floating point round error */
01959                     itrack = min(itrack, nodes_per_chan - group_size);
01960 
01961                     /* Assign the group of tracks for the Fc pattern */
01962                     for(k = 0; k < group_size; ++k)
01963                         {
01964                             tracks_connected_to_pin[ipin][ioff][iside]
01965                                 [group_size * j + k] = itrack + k;
01966                         }
01967                 }
01968         }
01969 }
01970 
01971 /** Loads the tracks_connected_to_pin array with an unevenly distributed     
01972  * set of switches across the channel.  This is done for inputs when        
01973  * Fc_input = Fc_output to avoid creating "pin domains" -- certain output   
01974  * pins being able to talk only to certain input pins because their switch  
01975  * patterns exactly line up.  Distribute Fc/2 + 1 switches over half the    
01976  * channel and Fc/2 - 1 switches over the other half to make the switch      
01977  * pattern different from the uniform one of the outputs.  Also, have half  
01978  * the pins put the "dense" part of their connections in the first half of  
01979  * the channel and the other half put the "dense" part in the second half,  
01980  * to make sure each track can connect to about the same number of ipins.   
01981  */
01982 static void
01983 load_perturbed_switch_pattern(INP t_type_ptr type,
01984                               INOUTP int ****tracks_connected_to_pin,
01985                               INP int num_phys_pins,
01986                               INP int *pin_num_ordering,
01987                               INP int *side_ordering,
01988                               INP int *offset_ordering,
01989                               INP int nodes_per_chan,
01990                               INP int Fc,
01991                               enum e_directionality directionality)
01992 {
01993 
01994     int i, j, ipin, iside, itrack, ihalf, iconn, ioff;
01995     int Fc_dense, Fc_sparse, Fc_half[2];
01996     float f_track, spacing_dense, spacing_sparse, spacing[2];
01997     float step_size;
01998 
01999     assert(directionality == BI_DIRECTIONAL);
02000 
02001     step_size = (float)nodes_per_chan / (float)(Fc * num_phys_pins);
02002 
02003     Fc_dense = (Fc / 2) + 1;
02004     Fc_sparse = Fc - Fc_dense;  /* Works for even or odd Fc */
02005 
02006     spacing_dense = (float)nodes_per_chan / (float)(2 * Fc_dense);
02007     spacing_sparse = (float)nodes_per_chan / (float)(2 * Fc_sparse);
02008 
02009     for(i = 0; i < num_phys_pins; i++)
02010         {
02011             ipin = pin_num_ordering[i];
02012             iside = side_ordering[i];
02013             ioff = offset_ordering[i];
02014 
02015             /* Flip every pin to balance switch density */
02016             spacing[i % 2] = spacing_dense;
02017             Fc_half[i % 2] = Fc_dense;
02018             spacing[(i + 1) % 2] = spacing_sparse;
02019             Fc_half[(i + 1) % 2] = Fc_sparse;
02020 
02021             f_track = i * step_size;    /* Start point.  Staggered from pin to pin */
02022             iconn = 0;
02023 
02024             for(ihalf = 0; ihalf < 2; ihalf++)
02025                 {               /* For both dense and sparse halves. */
02026                     for(j = 0; j < Fc_half[ihalf]; ++j)
02027                         {
02028                             itrack = (int)f_track;
02029 
02030                             /* Can occasionally get wraparound due to floating point rounding. 
02031                                    This is okay because the starting position > 0 when this occurs
02032                                    so connection is valid and fine */
02033                             itrack = itrack % nodes_per_chan;
02034                             tracks_connected_to_pin[ipin][ioff][iside][iconn]
02035                                 = itrack;
02036 
02037                             f_track += spacing[ihalf];
02038                             iconn++;
02039                         }
02040                 }
02041         }                       /* End for all physical pins. */
02042 }
02043 
02044 
02045 /** Checks that all tracks can be reached by some pin.   */
02046 static void
02047 check_all_tracks_reach_pins(t_type_ptr type,
02048                             int ****tracks_connected_to_pin,
02049                             int nodes_per_chan,
02050                             int Fc,
02051                             enum e_pin_type ipin_or_opin)
02052 {
02053     int iconn, iside, itrack, ipin, ioff;
02054     int *num_conns_to_track;    /* [0..nodes_per_chan-1] */
02055 
02056         assert(nodes_per_chan > 0);    
02057 
02058     num_conns_to_track = (int *)my_calloc(nodes_per_chan, sizeof(int));
02059 
02060     for(ipin = 0; ipin < type->num_pins; ipin++)
02061         {
02062             for(ioff = 0; ioff < type->height; ioff++)
02063                 {
02064                     for(iside = 0; iside < 4; iside++)
02065                         {
02066                             if(tracks_connected_to_pin[ipin][ioff][iside][0]
02067                                != OPEN)
02068                                 {       /* Pin exists */
02069                                     for(iconn = 0; iconn < Fc; iconn++)
02070                                         {
02071                                             itrack =
02072                                                 tracks_connected_to_pin[ipin]
02073                                                 [ioff][iside][iconn];
02074                                             num_conns_to_track[itrack]++;
02075                                         }
02076                                 }
02077                         }
02078                 }
02079         }
02080 
02081         for(itrack = 0; itrack < nodes_per_chan; itrack++)
02082         {
02083             if(num_conns_to_track[itrack] <= 0)
02084                 {
02085                     printf
02086                         ("Warning (check_all_tracks_reach_pins):  track %d does not \n"
02087                          "\tconnect to any CLB ", itrack);
02088 
02089                     if(ipin_or_opin == DRIVER)
02090                         printf("OPINs.\n");
02091                     else
02092                         printf("IPINs.\n");
02093                 }
02094         }
02095 
02096     free(num_conns_to_track);
02097 }
02098 
02099 /** Allocates and loads the track to ipin lookup for each physical grid type. This
02100  * is the same information as the ipin_to_track map but accessed in a different way. 
02101  */
02102 static struct s_ivec ***
02103 alloc_and_load_track_to_pin_lookup(INP int ****pin_to_track_map,
02104                                    INP int Fc,
02105                                    INP int height,
02106                                    INP int num_pins,
02107                                    INP int nodes_per_chan)
02108 {
02109     int ipin, iside, itrack, iconn, ioff, pin_counter;
02110     struct s_ivec ***track_to_pin_lookup;
02111 
02112     /* [0..nodes_per_chan-1][0..height][0..3].  For each track number it stores a vector  
02113      * for each of the four sides.  x-directed channels will use the TOP and   
02114      * BOTTOM vectors to figure out what clb input pins they connect to above  
02115      * and below them, respectively, while y-directed channels use the LEFT    
02116      * and RIGHT vectors.  Each vector contains an nelem field saying how many 
02117      * ipins it connects to.  The list[0..nelem-1] array then gives the pin    
02118      * numbers.                                                                */
02119 
02120     /* Note that a clb pin that connects to a channel on its RIGHT means that  *
02121      * that channel connects to a clb pin on its LEFT.  The convention used    *
02122      * here is always in the perspective of the CLB                            */
02123 
02124     if(num_pins < 1)
02125         {
02126             return NULL;
02127         }
02128 
02129     /* Alloc and zero the the lookup table */
02130     track_to_pin_lookup =
02131         (struct s_ivec ***)alloc_matrix3(0, nodes_per_chan - 1, 0,
02132                                          height - 1, 0, 3,
02133                                          sizeof(struct s_ivec));
02134     for(itrack = 0; itrack < nodes_per_chan; itrack++)
02135         {
02136             for(ioff = 0; ioff < height; ioff++)
02137                 {
02138                     for(iside = 0; iside < 4; iside++)
02139                         {
02140                             track_to_pin_lookup[itrack][ioff][iside].nelem =
02141                                 0;
02142                             track_to_pin_lookup[itrack][ioff][iside].list =
02143                                 NULL;
02144                         }
02145                 }
02146         }
02147 
02148     /* Counting pass.  */
02149     for(ipin = 0; ipin < num_pins; ipin++)
02150         {
02151             for(ioff = 0; ioff < height; ioff++)
02152                 {
02153                     for(iside = 0; iside < 4; iside++)
02154                         {
02155                             if(pin_to_track_map[ipin][ioff][iside][0] == OPEN)
02156                                 continue;
02157 
02158                             for(iconn = 0; iconn < Fc; iconn++)
02159                                 {
02160                                     itrack =
02161                                         pin_to_track_map[ipin][ioff][iside]
02162                                         [iconn];
02163                                     track_to_pin_lookup[itrack][ioff][iside].
02164                                         nelem++;
02165                                 }
02166                         }
02167                 }
02168         }
02169 
02170     /* Allocate space.  */
02171     for(itrack = 0; itrack < nodes_per_chan; itrack++)
02172         {
02173             for(ioff = 0; ioff < height; ioff++)
02174                 {
02175                     for(iside = 0; iside < 4; iside++)
02176                         {
02177                             track_to_pin_lookup[itrack][ioff][iside].list = NULL;       /* Defensive code */
02178                             if(track_to_pin_lookup[itrack][ioff][iside].
02179                                nelem != 0)
02180                                 {
02181                                     track_to_pin_lookup[itrack][ioff][iside].
02182                                         list =
02183                                         (int *)
02184                                         my_malloc(track_to_pin_lookup[itrack]
02185                                                   [ioff][iside].nelem *
02186                                                   sizeof(int));
02187                                     track_to_pin_lookup[itrack][ioff][iside].
02188                                         nelem = 0;
02189                                 }
02190                         }
02191                 }
02192         }
02193 
02194     /* Loading pass. */
02195     for(ipin = 0; ipin < num_pins; ipin++)
02196         {
02197             for(ioff = 0; ioff < height; ioff++)
02198                 {
02199                     for(iside = 0; iside < 4; iside++)
02200                         {
02201                             if(pin_to_track_map[ipin][ioff][iside][0] == OPEN)
02202                                 continue;
02203 
02204                             for(iconn = 0; iconn < Fc; iconn++)
02205                                 {
02206                                     itrack =
02207                                         pin_to_track_map[ipin][ioff][iside]
02208                                         [iconn];
02209                                     pin_counter =
02210                                         track_to_pin_lookup[itrack][ioff]
02211                                         [iside].nelem;
02212                                     track_to_pin_lookup[itrack][ioff][iside].
02213                                         list[pin_counter] = ipin;
02214                                     track_to_pin_lookup[itrack][ioff][iside].
02215                                         nelem++;
02216                                 }
02217                         }
02218                 }
02219         }
02220 
02221     return track_to_pin_lookup;
02222 }
02223 
02224 /** A utility routine to dump the contents of the routing resource graph   
02225  * (everything -- connectivity, occupancy, cost, etc.) into a file.  Used 
02226  * only for debugging.                                                    
02227  */
02228 void
02229 dump_rr_graph(INP const char *file_name)
02230 {
02231 
02232     int inode;
02233     FILE *fp;
02234 
02235     fp = my_fopen(file_name, "w", 0);
02236 
02237     for(inode = 0; inode < num_rr_nodes; inode++)
02238         {
02239             print_rr_node(fp, rr_node, inode);
02240             fprintf(fp, "\n");
02241         }
02242 
02243 #if 0
02244     fprintf(fp, "\n\n%d rr_indexed_data entries.\n\n", num_rr_indexed_data);
02245 
02246     for(index = 0; index < num_rr_indexed_data; index++)
02247         {
02248             print_rr_indexed_data(fp, index);
02249             fprintf(fp, "\n");
02250         }
02251 #endif
02252 
02253     fclose(fp);
02254 }
02255 
02256 
02257 /** Prints all the data about node inode to file fp.                    */
02258 void
02259 print_rr_node(FILE * fp,
02260               t_rr_node * rr_node,
02261               int inode)
02262 {
02263 
02264     static const char *name_type[] = {
02265         "SOURCE", "SINK", "IPIN", "OPIN", "CHANX", "CHANY", "INTRA_CLUSTER_EDGE"
02266     };
02267     static const char *direction_name[] = {
02268         "OPEN", "INC_DIRECTION", "DEC_DIRECTION", "BI_DIRECTION"
02269     };
02270     static const char *drivers_name[] = {
02271         "OPEN", "MULTI_BUFFER", "SINGLE"
02272     };
02273 
02274     t_rr_type rr_type;
02275     int iconn;
02276 
02277     rr_type = rr_node[inode].type;
02278 
02279     /* Make sure we don't overrun const arrays */
02280     assert(rr_type < (sizeof(name_type) / sizeof(char *)));
02281     assert((rr_node[inode].direction + 1) <
02282            (sizeof(direction_name) / sizeof(char *)));
02283     assert((rr_node[inode].drivers + 1) <
02284            (sizeof(drivers_name) / sizeof(char *)));
02285 
02286     fprintf(fp, "Node: %d %s ", inode, name_type[rr_type]);
02287     if((rr_node[inode].xlow == rr_node[inode].xhigh) &&
02288        (rr_node[inode].ylow == rr_node[inode].yhigh))
02289         {
02290             fprintf(fp, "(%d, %d) ", rr_node[inode].xlow,
02291                     rr_node[inode].ylow);
02292         }
02293     else
02294         {
02295             fprintf(fp, "(%d, %d) to (%d, %d) ", rr_node[inode].xlow,
02296                     rr_node[inode].ylow, rr_node[inode].xhigh,
02297                     rr_node[inode].yhigh);
02298         }
02299     fprintf(fp, "Ptc_num: %d ", rr_node[inode].ptc_num);
02300     fprintf(fp, "Direction: %s ",
02301             direction_name[rr_node[inode].direction + 1]);
02302     fprintf(fp, "Drivers: %s ", drivers_name[rr_node[inode].drivers + 1]);
02303     fprintf(fp, "\n");
02304 
02305     fprintf(fp, "%d edge(s):", rr_node[inode].num_edges);
02306     for(iconn = 0; iconn < rr_node[inode].num_edges; iconn++)
02307         fprintf(fp, " %d", rr_node[inode].edges[iconn]);
02308     fprintf(fp, "\n");
02309 
02310         fprintf(fp, "Switch types:");
02311         for(iconn = 0; iconn < rr_node[inode].num_edges; iconn++)
02312                 fprintf(fp, " %d", rr_node[inode].switches[iconn]);
02313         fprintf(fp, "\n");
02314 
02315     fprintf(fp, "Occ: %d  Capacity: %d\n", rr_node[inode].occ,
02316             rr_node[inode].capacity);
02317         if(rr_type != INTRA_CLUSTER_EDGE) {
02318                 fprintf(fp, "R: %g  C: %g\n", rr_node[inode].R, rr_node[inode].C);
02319         }
02320     fprintf(fp, "Cost_index: %d\n", rr_node[inode].cost_index);
02321 }
02322 
02323 
02324 /** Prints all the rr_indexed_data of index to file fp.   */
02325 void
02326 print_rr_indexed_data(FILE * fp,
02327                       int index)
02328 {
02329 
02330     fprintf(fp, "Index: %d\n", index);
02331 
02332     fprintf(fp, "ortho_cost_index: %d  ",
02333             rr_indexed_data[index].ortho_cost_index);
02334     fprintf(fp, "base_cost: %g  ", rr_indexed_data[index].saved_base_cost);
02335     fprintf(fp, "saved_base_cost: %g\n",
02336             rr_indexed_data[index].saved_base_cost);
02337 
02338     fprintf(fp, "Seg_index: %d  ", rr_indexed_data[index].seg_index);
02339     fprintf(fp, "inv_length: %g\n", rr_indexed_data[index].inv_length);
02340 
02341     fprintf(fp, "T_linear: %g  ", rr_indexed_data[index].T_linear);
02342     fprintf(fp, "T_quadratic: %g  ", rr_indexed_data[index].T_quadratic);
02343     fprintf(fp, "C_load: %g\n", rr_indexed_data[index].C_load);
02344 }
02345 
02346 
02347 /** This routine returns a list of the opins rr_nodes on each
02348  * side/offset of the block. You must free the result with
02349  * free_matrix. 
02350  */
02351 static void
02352 build_unidir_rr_opins(INP int i,
02353                       INP int j,
02354                       INP struct s_grid_tile **grid,
02355                       INP int *Fc_out,
02356                       INP int nodes_per_chan,
02357                       INP t_seg_details * seg_details,
02358                       INOUTP int **Fc_xofs,
02359                       INOUTP int **Fc_yofs,
02360                       INOUTP t_rr_node * rr_node,
02361                       INOUTP boolean * rr_edge_done,
02362                       OUTP boolean * Fc_clipped,
02363                       INP t_ivec *** rr_node_indices)
02364 {
02365     t_type_ptr type;
02366     int ipin, iclass, ofs, chan, seg, max_len, inode;
02367     enum e_side side;
02368     t_rr_type chan_type;
02369     t_linked_edge *edge_list = NULL, *next;
02370     boolean clipped, vert, pos_dir;
02371     int num_edges;
02372     int **Fc_ofs;
02373 
02374     *Fc_clipped = FALSE;
02375 
02376     /* Only the base block of a set should use this function */
02377     if(grid[i][j].offset > 0)
02378         {
02379             return;
02380         }
02381 
02382     type = grid[i][j].type;
02383 
02384     /* Go through each pin and find its fanout. */
02385     for(ipin = 0; ipin < type->num_pins; ++ipin)
02386         {
02387             /* Skip global pins and ipins */
02388             iclass = type->pin_class[ipin];
02389             if(type->class_inf[iclass].type != DRIVER)
02390                 {
02391                     continue;
02392                 }
02393             if(type->is_global_pin[ipin])
02394                 {
02395                     continue;
02396                 }
02397 
02398             num_edges = 0;
02399             edge_list = NULL;
02400             for(ofs = 0; ofs < type->height; ++ofs)
02401                 {
02402                     for(side = 0; side < 4; ++side)
02403                         {
02404                             /* Can't do anything if pin isn't at this location */
02405                             if(0 == type->pinloc[ofs][side][ipin])
02406                                 {
02407                                     continue;
02408                                 }
02409 
02410                             /* Figure out the chan seg at that side. 
02411                              * side is the side of the logic or io block. */
02412                             vert = ((side == TOP) || (side == BOTTOM));
02413                             pos_dir = ((side == TOP) || (side == RIGHT));
02414                             chan_type = (vert ? CHANX : CHANY);
02415                             chan = (vert ? (j + ofs) : i);
02416                             seg = (vert ? i : (j + ofs));
02417                             max_len = (vert ? nx : ny);
02418                             Fc_ofs = (vert ? Fc_xofs : Fc_yofs);
02419                             if(FALSE == pos_dir)
02420                                 {
02421                                     --chan;
02422                                 }
02423 
02424                             /* Skip the location if there is no channel. */
02425                             if(chan < 0)
02426                                 {
02427                                     continue;
02428                                 }
02429                             if(seg < 1)
02430                                 {
02431                                     continue;
02432                                 }
02433                             if(seg > (vert ? nx : ny))
02434                                 {
02435                                     continue;
02436                                 }
02437                             if(chan > (vert ? ny : nx))
02438                                 {
02439                                     continue;
02440                                 }
02441 
02442                             /* Get the list of opin to mux connections for that chan seg. */
02443                             num_edges +=
02444                                 get_unidir_opin_connections(chan, seg,
02445                                                             Fc_out[type->
02446                                                                    index],
02447                                                             chan_type,
02448                                                             seg_details,
02449                                                             &edge_list,
02450                                                             Fc_ofs,
02451                                                             rr_edge_done,
02452                                                             max_len,
02453                                                             nodes_per_chan,
02454                                                             rr_node_indices,
02455                                                             &clipped);
02456                             if(clipped)
02457                                 {
02458                                     *Fc_clipped = TRUE;
02459                                 }
02460                         }
02461                 }
02462 
02463             /* Add the edges */
02464             inode = get_rr_node_index(i, j, OPIN, ipin, rr_node_indices);
02465             alloc_and_load_edges_and_switches(rr_node, inode, num_edges,
02466                                               rr_edge_done, edge_list);
02467                 while(edge_list != NULL) {
02468                         next = edge_list->next;
02469                         free(edge_list);
02470                         edge_list = next;
02471                 }
02472         }
02473 }
02474 #if 0
02475 static void
02476 load_uniform_opin_switch_pattern_paired(INP int *Fc_out,
02477                                         INP int num_pins,
02478                                         INP int *pins_in_chan_seg,
02479                                         INP int num_wire_inc_muxes,
02480                                         INP int num_wire_dec_muxes,
02481                                         INP int *wire_inc_muxes,
02482                                         INP int *wire_dec_muxes,
02483                                         INOUTP t_rr_node * rr_node,
02484                                         INOUTP boolean * rr_edge_done,
02485                                         INP t_seg_details * seg_details,
02486                                         OUTP boolean * Fc_clipped)
02487 {
02488 
02489     /* Directionality is assumed to be uni-directional */
02490 
02491     /* Make turn-based assignment to avoid overlap when Fc_ouput is low. This is a bipartite
02492      * matching problem. Out of "num_wire_muxes" muxes "Fc_output" of them is assigned
02493      * to each outpin (total "num_pins" of them); assignment is uniform (spacing - spreadout) 
02494      * and staggered to avoid overlap when Fc_output is low. */
02495 
02496     /* The natural order wires muxes are stored in wire_muxes is alternating in directionality 
02497      * already (by my implementation), so no need to do anything extra to avoid directional bias */
02498 
02499     /* TODO: Due to spacing, it's possible to have directional bias: all Fc_out wires connected 
02500      * to one opin goes in either INC or DEC -> whereas I want a mix of both.
02501      * SOLUTION: Use quantization of 2 to ensure that if an opin connects to one wire, it 
02502      * must also connect to its companion wire, which runs in the opposite direction. This 
02503      * means instead of having num_wire_muxes as the matching set, pick out the INC wires
02504      * in num_wires_muxes as the matching set (the DEC wires are their companions)  April 17, 2007 
02505      * NEWS: That solution does not work, as treating wires in groups will lead to serious
02506      * abnormal patterns (conns crossing multiple blocks) for W nonquantized to multiples of 2L.
02507      * So, I'm chaning that approach to a new one that avoids directional bias: I will separate
02508      * the INC muxes and DEC muxes into two sets. Each set is uniformly assigned to opins with
02509      * Fc_output/2; this should be identical as before for normal cases and contains all conns
02510      * in the same chan segment for the nonquantized cases. */
02511 
02512     /* Finally, separated the two approaches: 1. Take all wire muxes and assign them to opins
02513      * one at a time (load_uniform_opin_switch_pattern) 2. Take pairs (by companion) 
02514      * of wire muxes and assign them to opins a pair at a time (load_uniform_opin_switch_pattern_paired). 
02515      * The first is used for fringe channel segments (ends of channels, where
02516      * there are lots of muxes due to partial wire segments) and the second is used in core */
02517 
02518     /* float spacing, step_size, f_mux; */
02519     int ipin, iconn, num_edges, init_mux;
02520     int from_node, to_node, to_track;
02521     int xlow, ylow;
02522     t_linked_edge *edge_list;
02523     int *wire_muxes;
02524     int k, num_wire_muxes, Fc_output_per_side, CurFc;
02525     int count_inc, count_dec;
02526     t_type_ptr type;
02527 
02528     *Fc_clipped = FALSE;
02529 
02530     count_inc = count_dec = 0;
02531 
02532     for(ipin = 0; ipin < num_pins; ipin++)
02533         {
02534             from_node = pins_in_chan_seg[ipin];
02535             xlow = rr_node[from_node].xlow;
02536             ylow = rr_node[from_node].ylow;
02537             type = grid[xlow][ylow].type;
02538             edge_list = NULL;
02539             num_edges = 0;
02540 
02541             /* Assigning the INC muxes first, then DEC muxes */
02542             for(k = 0; k < 2; ++k)
02543                 {
02544                     if(k == 0)
02545                         {
02546                             num_wire_muxes = num_wire_inc_muxes;
02547                             wire_muxes = wire_inc_muxes;
02548                         }
02549                     else
02550                         {
02551                             num_wire_muxes = num_wire_dec_muxes;
02552                             wire_muxes = wire_dec_muxes;
02553                         }
02554 
02555                     /* Half the Fc will be assigned for each direction. */
02556                     assert(Fc_out[type->index] % 2 == 0);
02557                     Fc_output_per_side = Fc_out[type->index] / 2;
02558 
02559                     /* Clip the demand. Make sure to use a new variable so
02560                      * on the second pass it is not clipped. */
02561                     CurFc = Fc_output_per_side;
02562                     if(Fc_output_per_side > num_wire_muxes)
02563                         {
02564                             *Fc_clipped = TRUE;
02565                             CurFc = num_wire_muxes;
02566                         }
02567 
02568                     if(k == 0)
02569                         {
02570                             init_mux = (count_inc) % num_wire_muxes;
02571                             count_inc += CurFc;
02572                         }
02573                     else
02574                         {
02575                             init_mux = (count_dec) % num_wire_muxes;
02576                             count_dec += CurFc;
02577                         }
02578 
02579                     for(iconn = 0; iconn < CurFc; iconn++)
02580                         {
02581                             /* FINALLY, make the outpin to mux connection */
02582                             /* Latest update: I'm not using Uniform Pattern, but a similarly staggered pattern */
02583                             to_node =
02584                                 wire_muxes[(init_mux +
02585                                             iconn) % num_wire_muxes];
02586 
02587                             rr_node[to_node].num_opin_drivers++;        /* keep track of mux size */
02588                             to_track = rr_node[to_node].ptc_num;
02589 
02590                             if(FALSE == rr_edge_done[to_node])
02591                                 {
02592                                     /* Use of alloc_and_load_edges_and_switches 
02593                                      * must be accompanied by rr_edge_done check. */
02594                                     rr_edge_done[to_node] = TRUE;
02595                                     edge_list =
02596                                         insert_in_edge_list(edge_list,
02597                                                             to_node,
02598                                                             seg_details
02599                                                             [to_track].
02600                                                             wire_switch);
02601                                     num_edges++;
02602                                 }
02603                         }
02604                 }
02605 
02606             if(num_edges < 1)
02607                 {
02608                     printf
02609                         ("Error:  opin %d at (%d,%d) does not connect to any "
02610                          "tracks.\n", rr_node[from_node].ptc_num,
02611                          rr_node[from_node].xlow, rr_node[from_node].ylow);
02612                     exit(1);
02613                 }
02614 
02615             alloc_and_load_edges_and_switches(rr_node, from_node, num_edges,
02616                                               rr_edge_done, edge_list);
02617         }
02618 }
02619 #endif
02620 
02621 #if MUX_SIZE_DIST_DISPLAY
02622 /** This routine prints and dumps statistics on the mux sizes on a sblock 
02623  * per sblock basis, over the entire chip. Mux sizes should be balanced (off by
02624  * at most 1) for all muxes in the same sblock in the core, and corner sblocks.
02625  * Fringe sblocks will have imbalance due to missing one side and constrains on 
02626  * where wires must connect. Comparing two core sblock sblocks, muxes need not 
02627  * be balanced if W is not quantized to 2L multiples, again for reasons that 
02628  * there could be sblocks with different number of muxes but same number of incoming
02629  * wires that need to make connections to these muxes (we don't want to under-connect
02630  * user-specified Fc and Fs). 
02631  */
02632 static void
02633 view_mux_size_distribution(t_ivec *** rr_node_indices,
02634                            int nodes_per_chan,
02635                            t_seg_details * seg_details_x,
02636                            t_seg_details * seg_details_y)
02637 {
02638 
02639     int i, j, itrack, seg_num, chan_num, max_len;
02640     int start, end, inode, max_value, min_value;
02641     int array_count, k, num_muxes;
02642     short direction, side;
02643     float *percent_range_array;
02644     float percent_range, percent_range_sum, avg_percent_range;
02645     float std_dev_percent_range, deviation_f;
02646     int range, *range_array, global_max_range;
02647     float avg_range, range_sum, std_dev_range;
02648     t_seg_details *seg_details;
02649     t_mux *new_mux, *sblock_mux_list_head, *current, *next;
02650 
02651 #ifdef ENABLE_DUMP
02652     FILE *dump_file_per_sblock, *dump_file;
02653 #endif /* ENABLE_DUMP */
02654     t_mux_size_distribution *distr_list, *distr_current, *new_distribution,
02655         *distr_next;
02656 
02657 #ifdef ENABLE_DUMP
02658     dump_file = my_fopen("mux_size_dump.txt", "w", 0);
02659     dump_file_per_sblock = my_fopen("mux_size_per_sblock_dump.txt", "w", 0);
02660 #endif /* ENABLE_DUMP */
02661 
02662     sblock_mux_list_head = NULL;
02663     percent_range_array =
02664         (float *)my_malloc((nx - 1) * (ny - 1) * sizeof(float));
02665     range_array = (int *)my_malloc((nx - 1) * (ny - 1) * sizeof(int));
02666     array_count = 0;
02667     percent_range_sum = 0.0;
02668     range_sum = 0.0;
02669     global_max_range = 0;
02670     min_value = 0;
02671     max_value = 0;
02672     seg_num = 0;
02673     chan_num = 0;
02674     direction = 0;
02675     seg_details = 0;
02676     max_len = 0;
02677     distr_list = NULL;
02678 
02679     /* With the specified range, I'm only looking at core sblocks */
02680     for(j = (ny - 1); j > 0; j--)
02681         {
02682             for(i = 1; i < nx; i++)
02683                 {
02684                     num_muxes = 0;
02685                     for(side = 0; side < 4; side++)
02686                         {
02687                             switch (side)
02688                                 {
02689                                 case LEFT:
02690                                     seg_num = i;
02691                                     chan_num = j;
02692                                     direction = DEC_DIRECTION;  /* only DEC have muxes in that sblock */
02693                                     seg_details = seg_details_x;
02694                                     max_len = nx;
02695                                     break;
02696 
02697                                 case RIGHT:
02698                                     seg_num = i + 1;
02699                                     chan_num = j;
02700                                     direction = INC_DIRECTION;
02701                                     seg_details = seg_details_x;
02702                                     max_len = nx;
02703                                     break;
02704 
02705                                 case TOP:
02706                                     seg_num = j + 1;
02707                                     chan_num = i;
02708                                     direction = INC_DIRECTION;
02709                                     seg_details = seg_details_y;
02710                                     max_len = ny;
02711                                     break;
02712 
02713                                 case BOTTOM:
02714                                     seg_num = j;
02715                                     chan_num = i;
02716                                     direction = DEC_DIRECTION;
02717                                     seg_details = seg_details_y;
02718                                     max_len = ny;
02719                                     break;
02720 
02721                                 default:
02722                                     assert(FALSE);
02723                                 }
02724 
02725                             assert(nodes_per_chan > 0);
02726                             for(itrack = 0; itrack < nodes_per_chan; itrack++)
02727                                 {
02728                                     start =
02729                                         get_seg_start(seg_details, itrack,
02730                                                       seg_num, chan_num);
02731                                     end =
02732                                         get_seg_end(seg_details, itrack,
02733                                                     start, chan_num, max_len);
02734 
02735                                     if((seg_details[itrack].direction ==
02736                                         direction) && (((start == seg_num)
02737                                                         && (direction ==
02738                                                             INC_DIRECTION))
02739                                                        || ((end == seg_num)
02740                                                            && (direction ==
02741                                                                DEC_DIRECTION))))
02742                                         {       /* mux found */
02743                                             num_muxes++;
02744                                             if(side == LEFT || side == RIGHT)
02745                                                 {       /* CHANX */
02746                                                     inode =
02747                                                         get_rr_node_index
02748                                                         (seg_num, chan_num,
02749                                                          CHANX, itrack,
02750                                                          rr_node_indices);
02751                                                 }
02752                                             else
02753                                                 {
02754                                                     assert((side == TOP) || (side == BOTTOM));  /* CHANY */
02755                                                     inode =
02756                                                         get_rr_node_index
02757                                                         (chan_num, seg_num,
02758                                                          CHANY, itrack,
02759                                                          rr_node_indices);
02760                                                 }
02761 
02762                                             new_mux = (t_mux *)
02763                                                 my_malloc(sizeof(t_mux));
02764                                             new_mux->size =
02765                                                 rr_node[inode].
02766                                                 num_wire_drivers +
02767                                                 rr_node[inode].
02768                                                 num_opin_drivers;
02769                                             new_mux->next = NULL;
02770 
02771                                             /* insert in linked list, descending */
02772                                             if(sblock_mux_list_head == NULL)
02773                                                 {
02774                                                     /* first entry */
02775                                                     sblock_mux_list_head =
02776                                                         new_mux;
02777                                                 }
02778                                             else if(sblock_mux_list_head->
02779                                                     size < new_mux->size)
02780                                                 {
02781                                                     /* insert before head */
02782                                                     new_mux->next =
02783                                                         sblock_mux_list_head;
02784                                                     sblock_mux_list_head =
02785                                                         new_mux;
02786                                                 }
02787                                             else
02788                                                 {
02789                                                     /* insert after head */
02790                                                     current =
02791                                                         sblock_mux_list_head;
02792                                                     next = current->next;
02793 
02794                                                     while((next != NULL)
02795                                                           && (next->size >
02796                                                               new_mux->size))
02797                                                         {
02798                                                             current = next;
02799                                                             next =
02800                                                                 current->next;
02801                                                         }
02802 
02803                                                     if(next == NULL)
02804                                                         {
02805                                                             current->next =
02806                                                                 new_mux;
02807                                                         }
02808                                                     else
02809                                                         {
02810                                                             new_mux->next =
02811                                                                 current->next;
02812                                                             current->next =
02813                                                                 new_mux;
02814                                                         }
02815                                                 }
02816                                             /* end of insert in linked list */
02817                                         }
02818                                 }
02819                         }       /* end of mux searching over all four sides of sblock */
02820                     /* now sblock_mux_list_head holds a linked list of all muxes in this sblock */
02821 
02822                     current = sblock_mux_list_head;
02823 
02824 #ifdef ENABLE_DUMP
02825                     fprintf(dump_file_per_sblock,
02826                             "sblock at (%d, %d) has mux sizes: {", i, j);
02827 #endif /* ENABLE_DUMP */
02828 
02829                     if(current != NULL)
02830                         {
02831                             max_value = min_value = current->size;
02832                         }
02833                     while(current != NULL)
02834                         {
02835                             if(max_value < current->size)
02836                                 max_value = current->size;
02837                             if(min_value > current->size)
02838                                 min_value = current->size;
02839 
02840 #ifdef ENABLE_DUMP
02841                             fprintf(dump_file_per_sblock, "%d ",
02842                                     current->size);
02843                             fprintf(dump_file, "%d\n", current->size);
02844 #endif /* ENABLE_DUMP */
02845 
02846                             current = current->next;
02847                         }
02848 
02849 #ifdef ENABLE_DUMP
02850                     fprintf(dump_file_per_sblock, "}\n\tmax: %d\tmin:%d",
02851                             max_value, min_value);
02852 #endif /* ENABLE_DUMP */
02853 
02854                     range = max_value - min_value;
02855                     percent_range = ((float)range) / ((float)min_value);
02856 
02857                     if(global_max_range < range)
02858                         global_max_range = range;
02859 
02860 #ifdef ENABLE_DUMP
02861                     fprintf(dump_file_per_sblock,
02862                             "\t\trange: %d\t\tpercent range:%.2f\n",
02863                             range, percent_range);
02864 #endif /* ENABLE_DUMP */
02865 
02866                     percent_range_array[array_count] = percent_range;
02867                     range_array[array_count] = range;
02868 
02869                     percent_range_sum += percent_range;
02870                     range_sum += range;
02871 
02872                     array_count++;
02873 
02874                     /* I will use a distribution for each (core) sblock type. 
02875                      * There are more than 1 type of sblocks, 
02876                      * when quantization of W to 2L multiples is not observed. */
02877 
02878 
02879 
02880                     distr_current = distr_list;
02881                     while(distr_current != NULL
02882                           && distr_current->mux_count != num_muxes)
02883                         {
02884                             distr_current = distr_current->next;
02885                         }
02886 
02887                     if(distr_current == NULL)
02888                         {
02889                             /* Create a distribution for the new sblock type, 
02890                              * and put it as head of linked list by convention */
02891 
02892                             new_distribution = (t_mux_size_distribution *)
02893                                 my_malloc(sizeof(t_mux_size_distribution));
02894                             new_distribution->mux_count = num_muxes;
02895                             new_distribution->max_index = max_value;
02896                             new_distribution->distr =
02897                                 (int *)my_calloc(max_value + 1, sizeof(int));
02898 
02899                             /* filling in the distribution */
02900                             current = sblock_mux_list_head;
02901                             while(current != NULL)
02902                                 {
02903                                     assert(current->size <=
02904                                            new_distribution->max_index);
02905                                     new_distribution->distr[current->size]++;
02906                                     current = current->next;
02907                                 }
02908 
02909                             /* add it to head */
02910                             new_distribution->next = distr_list;
02911                             distr_list = new_distribution;
02912                         }
02913                     else
02914                         {
02915                             /* distr_current->mux_count == num_muxes so add this sblock's mux sizes in this distribution */
02916                             current = sblock_mux_list_head;
02917 
02918                             while(current != NULL)
02919                                 {
02920                                     if(current->size >
02921                                        distr_current->max_index)
02922                                         {
02923                                             /* needs to realloc to expand the distribution array to hold the new large-valued data */
02924                                             distr_current->distr =
02925                                                 my_realloc(distr_current->
02926                                                            distr,
02927                                                            (current->size +
02928                                                             1) * sizeof(int));
02929 
02930                                             /* initializing the newly allocated elements */
02931                                             for(k =
02932                                                 (distr_current->max_index +
02933                                                  1); k <= current->size; k++)
02934                                                 distr_current->distr[k] = 0;
02935 
02936                                             distr_current->max_index =
02937                                                 current->size;
02938                                             distr_current->distr[current->
02939                                                                  size]++;
02940                                         }
02941                                     else
02942                                         {
02943                                             distr_current->distr[current->
02944                                                                  size]++;
02945                                         }
02946                                     current = current->next;
02947                                 }
02948                         }
02949 
02950                     /* done - now free memory */
02951                     current = sblock_mux_list_head;
02952                     while(current != NULL)
02953                         {
02954                             next = current->next;
02955                             free(current);
02956                             current = next;
02957                         }
02958                     sblock_mux_list_head = NULL;
02959                 }
02960         }
02961 
02962     avg_percent_range = (float)percent_range_sum / array_count;
02963     avg_range = (float)range_sum / array_count;
02964 
02965     percent_range_sum = 0.0;
02966     range_sum = 0.0;
02967     for(k = 0; k < array_count; k++)
02968         {
02969             deviation_f = (percent_range_array[k] - avg_percent_range);
02970             percent_range_sum += deviation_f * deviation_f;
02971 
02972             deviation_f = ((float)range_array[k] - avg_range);
02973             range_sum += deviation_f * deviation_f;
02974         }
02975     std_dev_percent_range =
02976         sqrt(percent_range_sum / ((float)array_count - 1.0));
02977     std_dev_range = sqrt(range_sum / ((float)array_count - 1.0));
02978     printf("==== MUX size statistics ====\n");
02979     printf("max range of mux size within a sblock is; %d\n",
02980            global_max_range);
02981     printf("average range of mux size within a sblock is; %.2f\n", avg_range);
02982     printf("std dev of range of mux size within a sblock is; %.2f\n",
02983            std_dev_range);
02984     printf
02985         ("average percent range of mux size within a sblock is; %.2f%%\n",
02986          avg_percent_range * 100.0);
02987     printf
02988         ("std dev of percent range of mux size within a sblock is; %.2f%%\n",
02989          std_dev_percent_range * 100.0);
02990 
02991     printf(" -- Detailed MUX size distribution by sblock type -- \n");
02992     distr_current = distr_list;
02993     while(distr_current != NULL)
02994         {
02995             print_distribution(stdout, distr_current);
02996 
02997             /* free */
02998             distr_next = distr_current->next;
02999 
03000             free(distr_current->distr);
03001             free(distr_current);
03002 
03003             distr_current = distr_next;
03004         }
03005 
03006     free(percent_range_array);
03007     free(range_array);
03008 #ifdef ENABLE_DUMP
03009     fclose(dump_file_per_sblock);
03010     fclose(dump_file);
03011 #endif /* ENABLE_DUMP */
03012 }
03013 
03014 static void
03015 print_distribution(FILE * fptr,
03016                    t_mux_size_distribution * distr_struct)
03017 {
03018     int *distr;
03019     int k;
03020     float sum;
03021     boolean zeros;
03022 
03023     distr = distr_struct->distr;
03024     fprintf(fptr,
03025             "For Sblocks containing %d MUXes, the MUX size distribution is:\n",
03026             distr_struct->mux_count);
03027     fprintf(fptr, "\t\t\tSize\t\t\tFrequency (percent)\n");
03028 
03029     sum = 0.0;
03030     for(k = 0; k <= distr_struct->max_index; k++)
03031         sum += distr[k];
03032 
03033     zeros = TRUE;
03034     for(k = 0; k <= distr_struct->max_index; k++)
03035         {
03036             if(zeros && (distr[k] == 0))
03037                 {
03038                     /* do nothing for leading string of zeros */
03039                 }
03040             else
03041                 {
03042                     zeros = FALSE;      /* leading string of zeros ended */
03043                     fprintf(fptr, "\t\t\t%d\t\t\t%d (%.2f%%)\n", k, distr[k],
03044                             (float)distr[k] / sum * 100.0);
03045                 }
03046         }
03047     fprintf(fptr, "\nEnd of this Sblock MUX size distribution.\n");
03048 
03049 }
03050 #endif