VPR-6.0
|
00001 #include <stdio.h> 00002 #include <math.h> 00003 #include <assert.h> 00004 #include <string.h> 00005 #include "util.h" 00006 #include "vpr_types.h" 00007 #include "globals.h" 00008 #include "rr_graph_util.h" 00009 #include "rr_graph.h" 00010 #include "rr_graph2.h" 00011 #include "rr_graph_sbox.h" 00012 #include "check_rr_graph.h" 00013 #include "rr_graph_timing_params.h" 00014 #include "rr_graph_indexed_data.h" 00015 #include "vpr_utils.h" 00016 #include "read_xml_arch_file.h" 00017 00018 /* #define ENABLE_DUMP */ 00019 /* #define MUX_SIZE_DIST_DISPLAY */ 00020 00021 /** mux size statistic data structures */ 00022 typedef struct s_mux 00023 { 00024 int size; 00025 struct s_mux *next; 00026 } 00027 t_mux; 00028 00029 typedef struct s_mux_size_distribution 00030 { 00031 int mux_count; 00032 int max_index; 00033 int *distr; 00034 struct s_mux_size_distribution *next; 00035 } 00036 t_mux_size_distribution; 00037 00038 /* UDSD Modifications by WMF End */ 00039 00040 00041 /******************* Variables local to this module. ***********************/ 00042 00043 static struct s_linked_vptr *rr_mem_chunk_list_head = NULL; 00044 00045 /** Used to free "chunked" memory. If NULL, no rr_graph exists right now. */ 00046 static int chunk_bytes_avail = 0; 00047 /** Status of current chunk being dished out by calls to my_chunk_malloc. */ 00048 static char *chunk_next_avail_mem = NULL; 00049 00050 /********************* Subroutines local to this module. *******************/ 00051 static int ****alloc_and_load_pin_to_track_map(INP enum e_pin_type pin_type, 00052 INP int nodes_per_chan, 00053 INP int Fc, 00054 INP t_type_ptr Type, 00055 INP boolean 00056 perturb_switch_pattern, 00057 INP enum e_directionality 00058 directionality); 00059 00060 static struct s_ivec ***alloc_and_load_track_to_pin_lookup(INP int 00061 ****pin_to_track_map, 00062 INP int Fc, 00063 INP int height, 00064 INP int num_pins, 00065 INP int 00066 nodes_per_chan); 00067 00068 static void build_bidir_rr_opins(INP int i, 00069 INP int j, 00070 INOUTP t_rr_node * rr_node, 00071 INP t_ivec *** rr_node_indices, 00072 INP int *****opin_to_track_map, 00073 INP int *Fc_out, 00074 INP boolean * rr_edge_done, 00075 INP t_seg_details * seg_details, 00076 INP struct s_grid_tile **grid); 00077 00078 static void build_unidir_rr_opins(INP int i, 00079 INP int j, 00080 INP struct s_grid_tile **grid, 00081 INP int *Fc_out, 00082 INP int nodes_per_chan, 00083 INP t_seg_details * seg_details, 00084 INOUTP int **Fc_xofs, 00085 INOUTP int **Fc_yofs, 00086 INOUTP t_rr_node * rr_node, 00087 INOUTP boolean * rr_edge_done, 00088 OUTP boolean * Fc_clipped, 00089 INP t_ivec *** rr_node_indices); 00090 00091 static void alloc_and_load_rr_graph(INP int num_nodes, 00092 INP t_rr_node * rr_node, 00093 INP int num_seg_types, 00094 INP t_seg_details * seg_details, 00095 INP boolean * rr_edge_done, 00096 INP struct s_ivec ****track_to_ipin_lookup, 00097 INP int *****opin_to_track_map, 00098 INP struct s_ivec ***switch_block_conn, 00099 INP struct s_grid_tile **grid, 00100 INP int nx, 00101 INP int ny, 00102 INP int Fs, 00103 INP short *****sblock_pattern, 00104 INP int *Fc_out, 00105 INP int **Fc_xofs, 00106 INP int **Fc_yofs, 00107 INP t_ivec *** rr_node_indices, 00108 INP int nodes_per_chan, 00109 INP enum e_switch_block_type sb_type, 00110 INP int delayless_switch, 00111 INP enum e_directionality directionality, 00112 INP int wire_to_ipin_switch, 00113 OUTP boolean * Fc_clipped); 00114 00115 static void load_uniform_switch_pattern(INP t_type_ptr type, 00116 INOUTP int ****tracks_connected_to_pin, 00117 INP int num_phys_pins, 00118 INP int *pin_num_ordering, 00119 INP int *side_ordering, 00120 INP int *offset_ordering, 00121 INP int nodes_per_chan, 00122 INP int Fc, 00123 INP enum e_directionality 00124 directionality); 00125 00126 static void load_perturbed_switch_pattern(INP t_type_ptr type, 00127 INOUTP int 00128 ****tracks_connected_to_pin, 00129 INP int num_phys_pins, 00130 INP int *pin_num_ordering, 00131 INP int *side_ordering, 00132 INP int *offset_ordering, 00133 INP int nodes_per_chan, 00134 INP int Fc, 00135 INP enum e_directionality 00136 directionality); 00137 00138 static void check_all_tracks_reach_pins(t_type_ptr type, 00139 int ****tracks_connected_to_pin, 00140 int nodes_per_chan, 00141 int Fc, 00142 enum e_pin_type ipin_or_opin); 00143 00144 static boolean *alloc_and_load_perturb_ipins(INP int nodes_per_chan, 00145 INP int num_types, 00146 INP int *Fc_in, 00147 INP int *Fc_out, 00148 INP enum e_directionality 00149 directionality); 00150 00151 00152 static void build_rr_sinks_sources(INP int i, 00153 INP int j, 00154 INP t_rr_node * rr_node, 00155 INP t_ivec *** rr_node_indices, 00156 INP int delayless_switch, 00157 INP struct s_grid_tile **grid); 00158 00159 static void build_rr_xchan(INP int i, 00160 INP int j, 00161 INP struct s_ivec ****track_to_ipin_lookup, 00162 INP struct s_ivec ***switch_block_conn, 00163 INP int cost_index_offset, 00164 INP int nodes_per_chan, 00165 INP int *opin_mux_size, 00166 INP short *****sblock_pattern, 00167 INP int Fs_per_side, 00168 INP t_seg_details * seg_details, 00169 INP t_ivec *** rr_node_indices, 00170 INP boolean * rr_edge_done, 00171 INOUTP t_rr_node * rr_node, 00172 INP int wire_to_ipin_switch, 00173 INP enum e_directionality directionality); 00174 00175 static void build_rr_ychan(INP int i, 00176 INP int j, 00177 INP struct s_ivec ****track_to_ipin_lookup, 00178 INP struct s_ivec ***switch_block_conn, 00179 INP int cost_index_offset, 00180 INP int nodes_per_chan, 00181 INP int *opin_mux_size, 00182 INP short *****sblock_pattern, 00183 INP int Fs_per_side, 00184 INP t_seg_details * seg_details, 00185 INP t_ivec *** rr_node_indices, 00186 INP boolean * rr_edge_done, 00187 INOUTP t_rr_node * rr_node, 00188 INP int wire_to_ipin_switch, 00189 INP enum e_directionality directionality); 00190 00191 00192 static void rr_graph_externals( 00193 t_timing_inf timing_inf, 00194 t_segment_inf * segment_inf, 00195 int num_seg_types, 00196 int nodes_per_chan, 00197 int wire_to_ipin_switch, 00198 enum e_base_cost_type base_cost_type); 00199 00200 void alloc_and_load_edges_and_switches(INP t_rr_node * rr_node, 00201 INP int inode, 00202 INP int num_edges, 00203 INP boolean * rr_edge_done, 00204 INP t_linked_edge * edge_list_head); 00205 00206 static void alloc_net_rr_terminals(void); 00207 00208 static void alloc_and_load_rr_clb_source(t_ivec *** rr_node_indices); 00209 00210 #if 0 00211 static void load_uniform_opin_switch_pattern_paired(INP int *Fc_out, 00212 INP int num_pins, 00213 INP int *pins_in_chan_seg, 00214 INP int num_wire_inc_muxes, 00215 INP int num_wire_dec_muxes, 00216 INP int *wire_inc_muxes, 00217 INP int *wire_dec_muxes, 00218 INOUTP t_rr_node * rr_node, 00219 INOUTP boolean * 00220 rr_edge_done, 00221 INP t_seg_details * 00222 seg_details, 00223 OUTP boolean * Fc_clipped); 00224 #endif 00225 void watch_edges(int inode, 00226 t_linked_edge * edge_list_head); 00227 #if MUX_SIZE_DIST_DISPLAY 00228 static void view_mux_size_distribution(t_ivec *** rr_node_indices, 00229 int nodes_per_chan, 00230 t_seg_details * seg_details_x, 00231 t_seg_details * seg_details_y); 00232 static void print_distribution(FILE * fptr, 00233 t_mux_size_distribution * distr_struct); 00234 #endif 00235 00236 static void free_type_pin_to_track_map(int***** ipin_to_track_map, t_type_ptr types, int* fc_in); 00237 00238 static void free_type_track_to_ipin_map(struct s_ivec**** track_to_pin_map, t_type_ptr types, int nodes_per_chan); 00239 00240 static t_seg_details *alloc_and_load_global_route_seg_details(INP int 00241 nodes_per_chan, 00242 INP int 00243 global_route_switch); 00244 00245 /* UDSD Modifications by WMF End */ 00246 00247 static int *alloc_and_load_actual_fc(INP int num_types, 00248 INP t_type_ptr types, 00249 INP int nodes_per_chan, 00250 INP boolean is_Fc_out, 00251 INP enum e_directionality directionality, 00252 OUTP boolean * Fc_clipped); 00253 00254 /******************* Subroutine definitions *******************************/ 00255 00256 00257 void 00258 build_rr_graph(INP t_graph_type graph_type, 00259 INP int num_types, 00260 INP t_type_ptr types, 00261 INP int nx, 00262 INP int ny, 00263 INP struct s_grid_tile **grid, 00264 INP int chan_width, 00265 INP struct s_chan_width_dist *chan_capacity_inf, 00266 INP enum e_switch_block_type sb_type, 00267 INP int Fs, 00268 INP int num_seg_types, 00269 INP int num_switches, 00270 INP t_segment_inf * segment_inf, 00271 INP int global_route_switch, 00272 INP int delayless_switch, 00273 INP t_timing_inf timing_inf, 00274 INP int wire_to_ipin_switch, 00275 INP enum e_base_cost_type base_cost_type, 00276 OUTP int *Warnings) 00277 { 00278 /* Temp structures used to build graph */ 00279 int nodes_per_chan, i, j; 00280 t_seg_details *seg_details = NULL; 00281 int *Fc_in = NULL; 00282 int *Fc_out = NULL; 00283 00284 int *****opin_to_track_map = NULL; /* [0..num_types-1][0..num_pins-1][0..height][0..3][0..Fc-1] */ 00285 int *****ipin_to_track_map = NULL; /* [0..num_types-1][0..num_pins-1][0..height][0..3][0..Fc-1] */ 00286 t_ivec ****track_to_ipin_lookup = NULL; /* [0..num_types-1][0..nodes_per_chan-1][0..height][0..3] */ 00287 t_ivec ***switch_block_conn = NULL; 00288 short *****unidir_sb_pattern = NULL; 00289 boolean *rr_edge_done = NULL; 00290 boolean is_global_graph; 00291 boolean Fc_clipped; 00292 boolean use_full_seg_groups; 00293 boolean *perturb_ipins = NULL; 00294 enum e_directionality directionality; 00295 int **Fc_xofs = NULL; /* [0..ny-1][0..nx-1] */ 00296 int **Fc_yofs = NULL; /* [0..nx-1][0..ny-1] */ 00297 00298 rr_node_indices = NULL; 00299 rr_node = NULL; 00300 num_rr_nodes = 0; 00301 00302 /* Reset warning flag */ 00303 *Warnings = RR_GRAPH_NO_WARN; 00304 00305 /* Decode the graph_type */ 00306 is_global_graph = FALSE; 00307 if(GRAPH_GLOBAL == graph_type) 00308 { 00309 is_global_graph = TRUE; 00310 } 00311 use_full_seg_groups = FALSE; 00312 if(GRAPH_UNIDIR_TILEABLE == graph_type) 00313 { 00314 use_full_seg_groups = TRUE; 00315 } 00316 directionality = UNI_DIRECTIONAL; 00317 if(GRAPH_BIDIR == graph_type) 00318 { 00319 directionality = BI_DIRECTIONAL; 00320 } 00321 if(is_global_graph) 00322 { 00323 directionality = BI_DIRECTIONAL; 00324 } 00325 00326 00327 /* Global routing uses a single longwire track */ 00328 nodes_per_chan = (is_global_graph ? 1 : chan_width); 00329 assert(nodes_per_chan > 0); 00330 00331 00332 00333 /* START SEG_DETAILS */ 00334 if(is_global_graph) 00335 { 00336 /* Sets up a single unit length segment type for global routing. */ 00337 seg_details = 00338 alloc_and_load_global_route_seg_details(nodes_per_chan, 00339 global_route_switch); 00340 } 00341 else 00342 { 00343 /* Setup segments including distrubuting tracks and staggering. 00344 * If use_full_seg_groups is specified, nodes_per_chan may be 00345 * changed. Warning should be singled to caller if this happens. */ 00346 seg_details = alloc_and_load_seg_details(&nodes_per_chan, 00347 max(nx, ny), 00348 num_seg_types, 00349 segment_inf, 00350 use_full_seg_groups, 00351 is_global_graph, 00352 directionality); 00353 if((is_global_graph ? 1 : chan_width) != nodes_per_chan) 00354 { 00355 *Warnings |= RR_GRAPH_WARN_CHAN_WIDTH_CHANGED; 00356 } 00357 #ifdef CREATE_ECHO_FILES 00358 dump_seg_details(seg_details, nodes_per_chan, "seg_details.txt"); 00359 #endif /* CREATE_ECHO_FILES */ 00360 } 00361 /* END SEG_DETAILS */ 00362 00363 00364 00365 /* START FC */ 00366 /* Determine the actual value of Fc */ 00367 if(is_global_graph) 00368 { 00369 Fc_in = (int *)my_malloc(sizeof(int) * num_types); 00370 Fc_out = (int *)my_malloc(sizeof(int) * num_types); 00371 for(i = 0; i < num_types; ++i) 00372 { 00373 Fc_in[i] = 1; 00374 Fc_out[i] = 1; 00375 } 00376 } 00377 else 00378 { 00379 Fc_clipped = FALSE; 00380 Fc_in = 00381 alloc_and_load_actual_fc(num_types, types, nodes_per_chan, 00382 FALSE, directionality, &Fc_clipped); 00383 if(Fc_clipped) 00384 { 00385 *Warnings |= RR_GRAPH_WARN_FC_CLIPPED; 00386 } 00387 Fc_clipped = FALSE; 00388 Fc_out = 00389 alloc_and_load_actual_fc(num_types, types, nodes_per_chan, 00390 TRUE, directionality, &Fc_clipped); 00391 if(Fc_clipped) 00392 { 00393 *Warnings |= RR_GRAPH_WARN_FC_CLIPPED; 00394 } 00395 00396 00397 #ifdef VERBOSE 00398 for(i = 1; i < num_types; ++i) 00399 { /* Skip "<EMPTY>" */ 00400 if(type_descriptors[i].is_Fc_out_full_flex) 00401 { 00402 printf 00403 ("Fc Actual Values: Type = %s, Fc_out = full, Fc_in = %d.\n", 00404 type_descriptors[i].name, Fc_input[i]); 00405 } 00406 else 00407 { 00408 printf 00409 ("Fc Actual Values: Type = %s, Fc_out = %d, Fc_in = %d.\n", 00410 type_descriptors[i].name, Fc_output[i], 00411 Fc_input[i]); 00412 } 00413 } 00414 #endif /* VERBOSE */ 00415 } 00416 00417 perturb_ipins = 00418 alloc_and_load_perturb_ipins(nodes_per_chan, num_types, Fc_in, 00419 Fc_out, directionality); 00420 /* END FC */ 00421 00422 00423 /* Alloc node lookups, count nodes, alloc rr nodes */ 00424 num_rr_nodes = 0; 00425 rr_node_indices = 00426 alloc_and_load_rr_node_indices(nodes_per_chan, nx, ny, &num_rr_nodes, 00427 seg_details); 00428 rr_node = (t_rr_node *) my_malloc(sizeof(t_rr_node) * num_rr_nodes); 00429 memset(rr_node, 0, sizeof(t_rr_node) * num_rr_nodes); 00430 rr_edge_done = (boolean *) my_malloc(sizeof(boolean) * num_rr_nodes); 00431 memset(rr_edge_done, 0, sizeof(boolean) * num_rr_nodes); 00432 00433 /* These are data structures used by the the unidir opin mapping. */ 00434 if(UNI_DIRECTIONAL == directionality) 00435 { 00436 Fc_xofs = (int **)alloc_matrix(0, ny, 0, nx, sizeof(int)); 00437 Fc_yofs = (int **)alloc_matrix(0, nx, 0, ny, sizeof(int)); 00438 for(i = 0; i <= nx; ++i) 00439 { 00440 for(j = 0; j <= ny; ++j) 00441 { 00442 Fc_xofs[j][i] = 0; 00443 Fc_yofs[i][j] = 0; 00444 } 00445 } 00446 } 00447 00448 00449 00450 /* START SB LOOKUP */ 00451 /* Alloc and load the switch block lookup */ 00452 if(is_global_graph) 00453 { 00454 assert(nodes_per_chan == 1); 00455 switch_block_conn = 00456 alloc_and_load_switch_block_conn(1, SUBSET, 3); 00457 } 00458 else if(BI_DIRECTIONAL == directionality) 00459 { 00460 switch_block_conn = 00461 alloc_and_load_switch_block_conn(nodes_per_chan, sb_type, Fs); 00462 } 00463 else 00464 { 00465 assert(UNI_DIRECTIONAL == directionality); 00466 00467 unidir_sb_pattern = 00468 alloc_sblock_pattern_lookup(nx, ny, nodes_per_chan); 00469 for(i = 0; i <= nx; i++) 00470 { 00471 for(j = 0; j <= ny; j++) 00472 { 00473 load_sblock_pattern_lookup(i, j, nodes_per_chan, 00474 seg_details, Fs, 00475 sb_type, 00476 unidir_sb_pattern); 00477 } 00478 } 00479 } 00480 /* END SB LOOKUP */ 00481 00482 00483 00484 /* START IPINP MAP */ 00485 /* Create ipin map lookups */ 00486 ipin_to_track_map = (int *****)my_malloc(sizeof(int ****) * num_types); 00487 track_to_ipin_lookup = 00488 (struct s_ivec ****)my_malloc(sizeof(struct s_ivec ***) * num_types); 00489 for(i = 0; i < num_types; ++i) 00490 { 00491 ipin_to_track_map[i] = alloc_and_load_pin_to_track_map(RECEIVER, 00492 nodes_per_chan, 00493 Fc_in[i], 00494 &types[i], 00495 perturb_ipins 00496 [i], 00497 directionality); 00498 track_to_ipin_lookup[i] = 00499 alloc_and_load_track_to_pin_lookup(ipin_to_track_map[i], 00500 Fc_in[i], types[i].height, 00501 types[i].num_pins, 00502 nodes_per_chan); 00503 } 00504 /* END IPINP MAP */ 00505 00506 00507 00508 /* START OPINP MAP */ 00509 /* Create opin map lookups */ 00510 if(BI_DIRECTIONAL == directionality) 00511 { 00512 opin_to_track_map = 00513 (int *****)my_malloc(sizeof(int ****) * num_types); 00514 for(i = 0; i < num_types; ++i) 00515 { 00516 opin_to_track_map[i] = 00517 alloc_and_load_pin_to_track_map(DRIVER, 00518 nodes_per_chan, 00519 Fc_out[i], &types[i], 00520 FALSE, 00521 directionality); 00522 } 00523 } 00524 /* END OPINP MAP */ 00525 00526 /* UDSD Modifications by WMF begin */ 00527 /* I'm adding 2 new fields to t_rr_node, and I want them initialized to 0. */ 00528 for(i = 0; i < num_rr_nodes; i++) 00529 { 00530 rr_node[i].num_wire_drivers = 0; 00531 rr_node[i].num_opin_drivers = 0; 00532 } 00533 00534 00535 alloc_and_load_rr_graph(num_rr_nodes, rr_node, num_seg_types, 00536 seg_details, rr_edge_done, 00537 track_to_ipin_lookup, opin_to_track_map, 00538 switch_block_conn, grid, nx, 00539 ny, Fs, unidir_sb_pattern, Fc_out, Fc_xofs, 00540 Fc_yofs, rr_node_indices, nodes_per_chan, 00541 sb_type, delayless_switch, directionality, 00542 wire_to_ipin_switch, &Fc_clipped); 00543 00544 00545 #ifdef MUX_SIZE_DIST_DISPLAY 00546 if(UNI_DIRECTIONAL == directionality) 00547 { 00548 view_mux_size_distribution(rr_node_indices, nodes_per_chan, 00549 seg_details, seg_details); 00550 } 00551 #endif 00552 00553 /* Update rr_nodes capacities if global routing */ 00554 if(graph_type == GLOBAL) { 00555 for(i = 0; i < num_rr_nodes; i++) { 00556 if(rr_node[i].type == CHANX || rr_node[i].type == CHANY) { 00557 rr_node[i].capacity = chan_width; 00558 } 00559 } 00560 } 00561 00562 rr_graph_externals(timing_inf, segment_inf, num_seg_types, 00563 nodes_per_chan, wire_to_ipin_switch, base_cost_type); 00564 #ifdef CREATE_ECHO_FILES 00565 dump_rr_graph("rr_graph.echo"); 00566 #endif /* CREATE_ECHO_FILES */ 00567 00568 check_rr_graph(graph_type, num_types, types, nx, ny, 00569 grid, nodes_per_chan, Fs, num_seg_types, num_switches, segment_inf, 00570 global_route_switch, delayless_switch, 00571 wire_to_ipin_switch, seg_details, Fc_in, Fc_out, 00572 rr_node_indices, opin_to_track_map, ipin_to_track_map, 00573 track_to_ipin_lookup, switch_block_conn, perturb_ipins); 00574 00575 /* Free all temp structs */ 00576 if(seg_details) 00577 { 00578 free_seg_details(seg_details, nodes_per_chan); 00579 seg_details = NULL; 00580 } 00581 if(Fc_in) 00582 { 00583 free(Fc_in); 00584 Fc_in = NULL; 00585 } 00586 if(Fc_out) 00587 { 00588 free(Fc_out); 00589 Fc_out = NULL; 00590 } 00591 if(perturb_ipins) 00592 { 00593 free(perturb_ipins); 00594 perturb_ipins = NULL; 00595 } 00596 if(switch_block_conn) 00597 { 00598 free_switch_block_conn(switch_block_conn, nodes_per_chan); 00599 switch_block_conn = NULL; 00600 } 00601 if(rr_edge_done) 00602 { 00603 free(rr_edge_done); 00604 rr_edge_done = NULL; 00605 } 00606 if(Fc_xofs) 00607 { 00608 free_matrix(Fc_xofs, 0, ny, 0, sizeof(int)); 00609 Fc_xofs = NULL; 00610 } 00611 if(Fc_yofs) 00612 { 00613 free_matrix(Fc_yofs, 0, nx, 0, sizeof(int)); 00614 Fc_yofs = NULL; 00615 } 00616 if(unidir_sb_pattern) 00617 { 00618 free_sblock_pattern_lookup(unidir_sb_pattern); 00619 unidir_sb_pattern = NULL; 00620 } 00621 if(opin_to_track_map) { 00622 for(i = 0; i < num_types; ++i) 00623 { 00624 free_matrix4(opin_to_track_map[i], 0, types[i].num_pins - 1, 00625 0, types[i].height - 1, 0, 3, 0, sizeof(int)); 00626 } 00627 free(opin_to_track_map); 00628 } 00629 00630 free_type_pin_to_track_map(ipin_to_track_map, types, Fc_in); 00631 free_type_track_to_ipin_map(track_to_ipin_lookup, types, nodes_per_chan); 00632 } 00633 00634 00635 static void 00636 rr_graph_externals( t_timing_inf timing_inf, 00637 t_segment_inf * segment_inf, 00638 int num_seg_types, 00639 int nodes_per_chan, 00640 int wire_to_ipin_switch, 00641 enum e_base_cost_type base_cost_type) 00642 { 00643 add_rr_graph_C_from_switches(timing_inf.C_ipin_cblock); 00644 alloc_and_load_rr_indexed_data(segment_inf, num_seg_types, 00645 rr_node_indices, nodes_per_chan, 00646 wire_to_ipin_switch, base_cost_type); 00647 00648 alloc_net_rr_terminals(); 00649 load_net_rr_terminals(rr_node_indices); 00650 alloc_and_load_rr_clb_source(rr_node_indices); 00651 } 00652 00653 00654 static boolean * 00655 alloc_and_load_perturb_ipins(INP int nodes_per_chan, 00656 INP int num_types, 00657 INP int *Fc_in, 00658 INP int *Fc_out, 00659 INP enum e_directionality directionality) 00660 { 00661 int i; 00662 float Fc_ratio; 00663 boolean *result = NULL; 00664 00665 result = (boolean *) my_malloc(num_types * sizeof(boolean)); 00666 00667 if(BI_DIRECTIONAL == directionality) 00668 { 00669 for(i = 0; i < num_types; ++i) 00670 { 00671 result[i] = FALSE; 00672 00673 if(Fc_in[i] > Fc_out[i]) 00674 { 00675 Fc_ratio = (float)Fc_in[i] / (float)Fc_out[i]; 00676 } 00677 else 00678 { 00679 Fc_ratio = (float)Fc_out[i] / (float)Fc_in[i]; 00680 } 00681 00682 if((Fc_in[i] <= nodes_per_chan - 2) && 00683 (fabs(Fc_ratio - nint(Fc_ratio)) < 00684 (0.5 / (float)nodes_per_chan))) 00685 { 00686 result[i] = TRUE; 00687 } 00688 } 00689 } 00690 else 00691 { 00692 /* Unidirectional routing uses mux balancing patterns and 00693 * thus shouldn't need perturbation. */ 00694 assert(UNI_DIRECTIONAL == directionality); 00695 for(i = 0; i < num_types; ++i) 00696 { 00697 result[i] = FALSE; 00698 } 00699 } 00700 00701 return result; 00702 } 00703 00704 00705 static t_seg_details * 00706 alloc_and_load_global_route_seg_details(INP int nodes_per_chan, 00707 INP int global_route_switch) 00708 { 00709 t_seg_details *result = NULL; 00710 00711 assert(nodes_per_chan == 1); 00712 result = (t_seg_details *) my_malloc(sizeof(t_seg_details)); 00713 00714 result->index = 0; 00715 result->length = 1; 00716 result->wire_switch = global_route_switch; 00717 result->opin_switch = global_route_switch; 00718 result->longline = FALSE; 00719 result->direction = BI_DIRECTION; 00720 result->Cmetal = 0.0; 00721 result->Rmetal = 0.0; 00722 result->start = 1; 00723 result->drivers = MULTI_BUFFERED; 00724 result->cb = (boolean *) my_malloc(sizeof(boolean) * 1); 00725 result->cb[0] = TRUE; 00726 result->sb = (boolean *) my_malloc(sizeof(boolean) * 2); 00727 result->sb[0] = TRUE; 00728 result->sb[1] = TRUE; 00729 result->group_size = 1; 00730 result->group_start = 0; 00731 00732 return result; 00733 } 00734 00735 00736 00737 00738 /** Calculates the actual Fc values for the given nodes_per_chan value */ 00739 static int * 00740 alloc_and_load_actual_fc(INP int num_types, 00741 INP t_type_ptr types, 00742 INP int nodes_per_chan, 00743 INP boolean is_Fc_out, 00744 INP enum e_directionality directionality, 00745 OUTP boolean * Fc_clipped) 00746 { 00747 00748 int i; 00749 int *Result = NULL; 00750 int fac, num_sets; 00751 float Fc; 00752 00753 *Fc_clipped = FALSE; 00754 00755 /* Unidir tracks formed in pairs, otherwise no effect. */ 00756 fac = 1; 00757 if(UNI_DIRECTIONAL == directionality) 00758 { 00759 fac = 2; 00760 } 00761 00762 assert((nodes_per_chan % fac) == 0); 00763 num_sets = nodes_per_chan / fac; 00764 00765 Result = (int *)my_malloc(sizeof(int) * num_types); 00766 00767 for(i = 0; i < num_types; ++i) 00768 { 00769 Fc = (is_Fc_out ? type_descriptors[i]. 00770 Fc_out : type_descriptors[i].Fc_in); 00771 00772 if(type_descriptors[i].is_Fc_frac) 00773 { 00774 Result[i] = fac * nint(num_sets * Fc); 00775 } 00776 else 00777 { 00778 Result[i] = Fc; 00779 } 00780 00781 if(is_Fc_out && type_descriptors[i].is_Fc_out_full_flex) 00782 { 00783 Result[i] = nodes_per_chan; 00784 } 00785 00786 Result[i] = max(Result[i], fac); 00787 if(Result[i] > nodes_per_chan) 00788 { 00789 *Fc_clipped = TRUE; 00790 Result[i] = nodes_per_chan; 00791 } 00792 00793 assert(Result[i] % fac == 0); 00794 } 00795 00796 return Result; 00797 } 00798 00799 /** frees the track to ipin mapping for each physical grid type */ 00800 static void 00801 free_type_track_to_ipin_map(struct s_ivec**** track_to_pin_map, t_type_ptr types, int nodes_per_chan) 00802 { 00803 int i, itrack, ioff, iside; 00804 for(i = 0; i < num_types; i++) { 00805 if(track_to_pin_map[i] != NULL) { 00806 for(itrack = 0; itrack < nodes_per_chan; itrack++) 00807 { 00808 for(ioff = 0; ioff < types[i].height; ioff++) 00809 { 00810 for(iside = 0; iside < 4; iside++) 00811 { 00812 if(track_to_pin_map[i][itrack][ioff][iside].list != NULL) { 00813 free(track_to_pin_map[i][itrack][ioff][iside].list); 00814 } 00815 } 00816 } 00817 } 00818 free_matrix3(track_to_pin_map[i], 0, nodes_per_chan - 1, 0, 00819 types[i].height - 1, 0, 00820 sizeof(struct s_ivec)); 00821 } 00822 } 00823 free(track_to_pin_map); 00824 } 00825 00826 /** frees the ipin to track mapping for each physical grid type */ 00827 static void 00828 free_type_pin_to_track_map(int***** ipin_to_track_map, t_type_ptr types, int* fc_in) 00829 { 00830 int i; 00831 for(i = 0; i < num_types; i++) { 00832 free_matrix4(ipin_to_track_map[i], 0, types[i].num_pins - 1, 00833 0, types[i].height - 1, 0, 3, 0, sizeof(int)); 00834 } 00835 free(ipin_to_track_map); 00836 } 00837 00838 /** Does the actual work of allocating the rr_graph and filling all the 00839 * appropriate values. Everything up to this was just a prelude! 00840 */ 00841 static void 00842 alloc_and_load_rr_graph(INP int num_nodes, 00843 INP t_rr_node * rr_node, 00844 INP int num_seg_types, 00845 INP t_seg_details * seg_details, 00846 INP boolean * rr_edge_done, 00847 INP struct s_ivec ****track_to_ipin_lookup, 00848 INP int *****opin_to_track_map, 00849 INP struct s_ivec ***switch_block_conn, 00850 INP struct s_grid_tile **grid, 00851 INP int nx, 00852 INP int ny, 00853 INP int Fs, 00854 INP short *****sblock_pattern, 00855 INP int *Fc_out, 00856 INP int **Fc_xofs, 00857 INP int **Fc_yofs, 00858 INP t_ivec *** rr_node_indices, 00859 INP int nodes_per_chan, 00860 INP enum e_switch_block_type sb_type, 00861 INP int delayless_switch, 00862 INP enum e_directionality directionality, 00863 INP int wire_to_ipin_switch, 00864 OUTP boolean * Fc_clipped) 00865 { 00866 00867 int i, j; 00868 boolean clipped; 00869 int *opin_mux_size = NULL; 00870 00871 00872 /* If Fc gets clipped, this will be flagged to true */ 00873 *Fc_clipped = FALSE; 00874 00875 /* Connection SINKS and SOURCES to their pins. */ 00876 for(i = 0; i <= (nx + 1); i++) 00877 { 00878 for(j = 0; j <= (ny + 1); j++) 00879 { 00880 build_rr_sinks_sources(i, j, rr_node, rr_node_indices, 00881 delayless_switch, grid); 00882 } 00883 } 00884 00885 /* Build opins */ 00886 for(i = 0; i <= (nx + 1); ++i) 00887 { 00888 for(j = 0; j <= (ny + 1); ++j) 00889 { 00890 if(BI_DIRECTIONAL == directionality) 00891 { 00892 build_bidir_rr_opins(i, j, rr_node, 00893 rr_node_indices, 00894 opin_to_track_map, Fc_out, 00895 rr_edge_done, seg_details, 00896 grid); 00897 } 00898 else 00899 { 00900 assert(UNI_DIRECTIONAL == directionality); 00901 build_unidir_rr_opins(i, j, grid, Fc_out, 00902 nodes_per_chan, seg_details, 00903 Fc_xofs, Fc_yofs, rr_node, 00904 rr_edge_done, &clipped, 00905 rr_node_indices); 00906 if(clipped) 00907 { 00908 *Fc_clipped = TRUE; 00909 } 00910 } 00911 } 00912 } 00913 00914 /* We make a copy of the current fanin values for the nodes to 00915 * know the number of OPINs driving each mux presently */ 00916 opin_mux_size = (int *)my_malloc(sizeof(int) * num_nodes); 00917 for(i = 0; i < num_nodes; ++i) 00918 { 00919 opin_mux_size[i] = rr_node[i].fan_in; 00920 } 00921 00922 /* Build channels */ 00923 assert(Fs % 3 == 0); 00924 for(i = 0; i <= nx; i++) 00925 { 00926 for(j = 0; j <= ny; j++) 00927 { 00928 if(i > 0) 00929 { 00930 build_rr_xchan(i, j, track_to_ipin_lookup, 00931 switch_block_conn, 00932 CHANX_COST_INDEX_START, 00933 nodes_per_chan, opin_mux_size, 00934 sblock_pattern, Fs / 3, 00935 seg_details, rr_node_indices, 00936 rr_edge_done, rr_node, 00937 wire_to_ipin_switch, 00938 directionality); 00939 } 00940 if(j > 0) 00941 { 00942 build_rr_ychan(i, j, track_to_ipin_lookup, 00943 switch_block_conn, 00944 CHANX_COST_INDEX_START + 00945 num_seg_types, nodes_per_chan, 00946 opin_mux_size, sblock_pattern, 00947 Fs / 3, seg_details, 00948 rr_node_indices, rr_edge_done, 00949 rr_node, wire_to_ipin_switch, 00950 directionality); 00951 } 00952 } 00953 } 00954 free(opin_mux_size); 00955 } 00956 00957 00958 00959 static void 00960 build_bidir_rr_opins(INP int i, 00961 INP int j, 00962 INOUTP t_rr_node * rr_node, 00963 INP t_ivec *** rr_node_indices, 00964 INP int *****opin_to_track_map, 00965 INP int *Fc_out, 00966 INP boolean * rr_edge_done, 00967 INP t_seg_details * seg_details, 00968 INP struct s_grid_tile **grid) 00969 { 00970 00971 int ipin, inode, num_edges, Fc, ofs; 00972 t_type_ptr type; 00973 struct s_linked_edge *edge_list, *next; 00974 00975 /* OPINP edges need to be done at once so let the offset 0 00976 * block do the work. */ 00977 if(grid[i][j].offset > 0) 00978 { 00979 return; 00980 } 00981 00982 type = grid[i][j].type; 00983 Fc = Fc_out[type->index]; 00984 00985 for(ipin = 0; ipin < type->num_pins; ++ipin) 00986 { 00987 /* We only are working with opins so skip non-drivers */ 00988 if(type->class_inf[type->pin_class[ipin]].type != DRIVER) 00989 { 00990 continue; 00991 } 00992 00993 num_edges = 0; 00994 edge_list = NULL; 00995 00996 for(ofs = 0; ofs < type->height; ++ofs) 00997 { 00998 num_edges += 00999 get_bidir_opin_connections(i, j + ofs, ipin, 01000 &edge_list, 01001 opin_to_track_map, Fc, 01002 rr_edge_done, 01003 rr_node_indices, 01004 seg_details); 01005 } 01006 01007 inode = get_rr_node_index(i, j, OPIN, ipin, rr_node_indices); 01008 alloc_and_load_edges_and_switches(rr_node, inode, num_edges, 01009 rr_edge_done, edge_list); 01010 while(edge_list != NULL) { 01011 next = edge_list->next; 01012 free(edge_list); 01013 edge_list = next; 01014 } 01015 } 01016 } 01017 01018 01019 /** Frees all the routing graph data structures, if they have been 01020 * allocated. I use rr_mem_chunk_list_head as a flag to indicate 01021 * whether or not the graph has been allocated -- if it is not NULL, 01022 * a routing graph exists and can be freed. Hence, you can call this 01023 * routine even if you're not sure of whether a rr_graph exists or not. 01024 */ 01025 void 01026 free_rr_graph(void) 01027 { 01028 int i; 01029 01030 if(rr_mem_chunk_list_head == NULL) /* Nothing to free. */ 01031 return; 01032 01033 free_chunk_memory(rr_mem_chunk_list_head); /* Frees ALL "chunked" data */ 01034 rr_mem_chunk_list_head = NULL; /* No chunks allocated now. */ 01035 chunk_bytes_avail = 0; /* 0 bytes left in current "chunk". */ 01036 chunk_next_avail_mem = NULL; /* No current chunk. */ 01037 01038 /* Before adding any more free calls here, be sure the data is NOT chunk * 01039 * allocated, as ALL the chunk allocated data is already free! */ 01040 01041 free(net_rr_terminals); 01042 01043 for(i = 0; i < num_rr_nodes; i++) { 01044 if(rr_node[i].edges != NULL) { 01045 free(rr_node[i].edges); 01046 } 01047 if(rr_node[i].switches != NULL) { 01048 free(rr_node[i].switches); 01049 } 01050 } 01051 01052 assert(rr_node_indices); 01053 free_rr_node_indices(rr_node_indices); 01054 free(rr_node); 01055 free(rr_indexed_data); 01056 for(i = 0; i < num_blocks; i++) 01057 { 01058 free(rr_blk_source[i]); 01059 } 01060 free(rr_blk_source); 01061 rr_blk_source = NULL; 01062 net_rr_terminals = NULL; 01063 rr_node = NULL; 01064 rr_node_indices = NULL; 01065 rr_indexed_data = NULL; 01066 } 01067 01068 01069 static void 01070 alloc_net_rr_terminals(void) 01071 { 01072 int inet; 01073 01074 net_rr_terminals = (int **)my_malloc(num_nets * sizeof(int *)); 01075 01076 for(inet = 0; inet < num_nets; inet++) 01077 { 01078 net_rr_terminals[inet] = 01079 (int *)my_chunk_malloc((clb_net[inet].num_sinks + 1) * 01080 sizeof(int), &rr_mem_chunk_list_head, 01081 &chunk_bytes_avail, 01082 &chunk_next_avail_mem); 01083 } 01084 } 01085 01086 /** Allocates and loads the net_rr_terminals data structure. For each net 01087 * it stores the rr_node index of the SOURCE of the net and all the SINKs 01088 * of the net. [0..num_nets-1][0..num_pins-1]. Entry [inet][pnum] stores 01089 * the rr index corresponding to the SOURCE (opin) or SINK (ipin) of pnum. 01090 */ 01091 void 01092 load_net_rr_terminals(t_ivec *** rr_node_indices) 01093 { 01094 01095 int inet, ipin, inode, iblk, i, j, k, node_block_pin, iclass; 01096 t_type_ptr type; 01097 01098 for(inet = 0; inet < num_nets; inet++) 01099 { 01100 for(ipin = 0; ipin <= clb_net[inet].num_sinks; ipin++) 01101 { 01102 iblk = clb_net[inet].node_block[ipin]; 01103 i = block[iblk].x; 01104 j = block[iblk].y; 01105 k = block[iblk].z; 01106 type = block[iblk].type; 01107 01108 /* In the routing graph, each (x, y) location has unique pins on it 01109 * so when there is capacity, blocks are packed and their pin numbers 01110 * are offset to get their actual rr_node */ 01111 node_block_pin = clb_net[inet].node_block_pin[ipin]; 01112 01113 iclass = type->pin_class[node_block_pin]; 01114 01115 inode = get_rr_node_index(i, j, (ipin == 0 ? SOURCE : SINK), /* First pin is driver */ 01116 iclass, rr_node_indices); 01117 net_rr_terminals[inet][ipin] = inode; 01118 } 01119 } 01120 } 01121 01122 /** Saves the rr_node corresponding to each SOURCE and SINK in each CLB 01123 * in the FPGA. Currently only the SOURCE rr_node values are used, and 01124 * they are used only to reserve pins for locally used OPINs in the router. 01125 * [0..num_blocks-1][0..num_class-1]. The values for blocks that are pads 01126 * are NOT valid. 01127 */ 01128 static void 01129 alloc_and_load_rr_clb_source(t_ivec *** rr_node_indices) 01130 { 01131 01132 int iblk, i, j, iclass, inode; 01133 int class_low, class_high; 01134 t_rr_type rr_type; 01135 t_type_ptr type; 01136 01137 rr_blk_source = (int **)my_malloc(num_blocks * sizeof(int *)); 01138 01139 for(iblk = 0; iblk < num_blocks; iblk++) 01140 { 01141 type = block[iblk].type; 01142 get_class_range_for_block(iblk, &class_low, &class_high); 01143 rr_blk_source[iblk] = 01144 (int *)my_malloc(type->num_class * sizeof(int)); 01145 for(iclass = 0; iclass < type->num_class; iclass++) 01146 { 01147 if(iclass >= class_low && iclass <= class_high) 01148 { 01149 i = block[iblk].x; 01150 j = block[iblk].y; 01151 01152 if(type->class_inf[iclass].type == DRIVER) 01153 rr_type = SOURCE; 01154 else 01155 rr_type = SINK; 01156 01157 inode = 01158 get_rr_node_index(i, j, rr_type, iclass, 01159 rr_node_indices); 01160 rr_blk_source[iblk][iclass] = inode; 01161 } 01162 else 01163 { 01164 rr_blk_source[iblk][iclass] = OPEN; 01165 } 01166 } 01167 } 01168 } 01169 01170 /* Loads IPIN, SINK, SOURCE, and OPIN. 01171 * Loads IPINP to SINK edges, and SOURCE to OPINP edges 01172 */ 01173 static void 01174 build_rr_sinks_sources(INP int i, 01175 INP int j, 01176 INP t_rr_node * rr_node, 01177 INP t_ivec *** rr_node_indices, 01178 INP int delayless_switch, 01179 INP struct s_grid_tile **grid) 01180 { 01181 01182 int ipin, iclass, inode, pin_num, to_node, num_edges; 01183 int num_class, num_pins; 01184 t_type_ptr type; 01185 struct s_class *class_inf; 01186 int *pin_class; 01187 01188 /* Since we share nodes within a large block, only 01189 * start tile can initialize sinks, sources, and pins */ 01190 if(grid[i][j].offset > 0) 01191 return; 01192 01193 type = grid[i][j].type; 01194 num_class = type->num_class; 01195 class_inf = type->class_inf; 01196 num_pins = type->num_pins; 01197 pin_class = type->pin_class; 01198 01199 /* SINKS and SOURCE to OPINP edges */ 01200 for(iclass = 0; iclass < num_class; iclass++) 01201 { 01202 if(class_inf[iclass].type == DRIVER) 01203 { /* SOURCE */ 01204 inode = 01205 get_rr_node_index(i, j, SOURCE, iclass, 01206 rr_node_indices); 01207 01208 num_edges = class_inf[iclass].num_pins; 01209 rr_node[inode].num_edges = num_edges; 01210 rr_node[inode].edges = 01211 (int *)my_malloc(num_edges * sizeof(int)); 01212 rr_node[inode].switches = 01213 (short *)my_malloc(num_edges * sizeof(short)); 01214 01215 for(ipin = 0; ipin < class_inf[iclass].num_pins; ipin++) 01216 { 01217 pin_num = class_inf[iclass].pinlist[ipin]; 01218 to_node = 01219 get_rr_node_index(i, j, OPIN, pin_num, 01220 rr_node_indices); 01221 rr_node[inode].edges[ipin] = to_node; 01222 rr_node[inode].switches[ipin] = delayless_switch; 01223 01224 ++rr_node[to_node].fan_in; 01225 } 01226 01227 rr_node[inode].cost_index = SOURCE_COST_INDEX; 01228 rr_node[inode].type = SOURCE; 01229 } 01230 else 01231 { /* SINK */ 01232 assert(class_inf[iclass].type == RECEIVER); 01233 inode = 01234 get_rr_node_index(i, j, SINK, iclass, 01235 rr_node_indices); 01236 01237 /* NOTE: To allow route throughs through clbs, change the lines below to * 01238 * make an edge from the input SINK to the output SOURCE. Do for just the * 01239 * special case of INPUTS = class 0 and OUTPUTS = class 1 and see what it * 01240 * leads to. If route throughs are allowed, you may want to increase the * 01241 * base cost of OPINs and/or SOURCES so they aren't used excessively. */ 01242 01243 /* Initialize to unconnected to fix values */ 01244 rr_node[inode].num_edges = 0; 01245 rr_node[inode].edges = NULL; 01246 rr_node[inode].switches = NULL; 01247 01248 rr_node[inode].cost_index = SINK_COST_INDEX; 01249 rr_node[inode].type = SINK; 01250 } 01251 01252 /* Things common to both SOURCEs and SINKs. */ 01253 rr_node[inode].capacity = class_inf[iclass].num_pins; 01254 rr_node[inode].occ = 0; 01255 rr_node[inode].xlow = i; 01256 rr_node[inode].xhigh = i; 01257 rr_node[inode].ylow = j; 01258 rr_node[inode].yhigh = j + type->height - 1; 01259 rr_node[inode].R = 0; 01260 rr_node[inode].C = 0; 01261 rr_node[inode].ptc_num = iclass; 01262 rr_node[inode].direction = OPEN; 01263 rr_node[inode].drivers = OPEN; 01264 } 01265 01266 /* Connect IPINS to SINKS and dummy for OPINS */ 01267 for(ipin = 0; ipin < num_pins; ipin++) 01268 { 01269 iclass = pin_class[ipin]; 01270 01271 if(class_inf[iclass].type == RECEIVER) 01272 { 01273 inode = 01274 get_rr_node_index(i, j, IPIN, ipin, rr_node_indices); 01275 to_node = 01276 get_rr_node_index(i, j, SINK, iclass, 01277 rr_node_indices); 01278 01279 rr_node[inode].num_edges = 1; 01280 rr_node[inode].edges = (int *)my_malloc(sizeof(int)); 01281 rr_node[inode].switches = 01282 (short *)my_malloc(sizeof(short)); 01283 01284 rr_node[inode].edges[0] = to_node; 01285 rr_node[inode].switches[0] = delayless_switch; 01286 01287 ++rr_node[to_node].fan_in; 01288 01289 rr_node[inode].cost_index = IPIN_COST_INDEX; 01290 rr_node[inode].type = IPIN; 01291 } 01292 else 01293 { 01294 assert(class_inf[iclass].type == DRIVER); 01295 01296 inode = 01297 get_rr_node_index(i, j, OPIN, ipin, rr_node_indices); 01298 01299 rr_node[inode].num_edges = 0; 01300 rr_node[inode].edges = NULL; 01301 01302 rr_node[inode].switches = NULL; 01303 01304 rr_node[inode].cost_index = OPIN_COST_INDEX; 01305 rr_node[inode].type = OPIN; 01306 } 01307 01308 /* Common to both DRIVERs and RECEIVERs */ 01309 rr_node[inode].capacity = 1; 01310 rr_node[inode].occ = 0; 01311 rr_node[inode].xlow = i; 01312 rr_node[inode].xhigh = i; 01313 rr_node[inode].ylow = j; 01314 rr_node[inode].yhigh = j + type->height - 1; 01315 rr_node[inode].C = 0; 01316 rr_node[inode].R = 0; 01317 rr_node[inode].ptc_num = ipin; 01318 rr_node[inode].direction = OPEN; 01319 rr_node[inode].drivers = OPEN; 01320 } 01321 } 01322 01323 01324 /* Loads up all the routing resource nodes in the x-directed channel 01325 * segments starting at (i,j). 01326 */ 01327 static void 01328 build_rr_xchan(INP int i, 01329 INP int j, 01330 INP struct s_ivec ****track_to_ipin_lookup, 01331 INP struct s_ivec ***switch_block_conn, 01332 INP int cost_index_offset, 01333 INP int nodes_per_chan, 01334 INP int *opin_mux_size, 01335 INP short *****sblock_pattern, 01336 INP int Fs_per_side, 01337 INP t_seg_details * seg_details, 01338 INP t_ivec *** rr_node_indices, 01339 INOUTP boolean * rr_edge_done, 01340 INOUTP t_rr_node * rr_node, 01341 INP int wire_to_ipin_switch, 01342 INP enum e_directionality directionality) 01343 { 01344 01345 int itrack, istart, iend, num_edges, inode, length; 01346 struct s_linked_edge *edge_list, *next; 01347 01348 01349 for(itrack = 0; itrack < nodes_per_chan; itrack++) 01350 { 01351 istart = get_seg_start(seg_details, itrack, j, i); 01352 iend = get_seg_end(seg_details, itrack, istart, j, nx); 01353 01354 if(i > istart) 01355 continue; /* Not the start of this segment. */ 01356 01357 edge_list = NULL; 01358 01359 /* First count number of edges and put the edges in a linked list. */ 01360 num_edges = 0; 01361 01362 num_edges += get_track_to_ipins(istart, j, itrack, 01363 &edge_list, 01364 rr_node_indices, 01365 track_to_ipin_lookup, 01366 seg_details, 01367 CHANX, nx, 01368 wire_to_ipin_switch, 01369 directionality); 01370 01371 if(j > 0) 01372 { 01373 num_edges += get_track_to_tracks(j, istart, itrack, CHANX, 01374 j, CHANY, nx, 01375 nodes_per_chan, 01376 opin_mux_size, 01377 Fs_per_side, 01378 sblock_pattern, 01379 &edge_list, 01380 seg_details, 01381 directionality, 01382 rr_node_indices, 01383 rr_edge_done, 01384 switch_block_conn); 01385 } 01386 01387 if(j < ny) 01388 { 01389 num_edges += get_track_to_tracks(j, istart, itrack, CHANX, 01390 j + 1, CHANY, nx, 01391 nodes_per_chan, 01392 opin_mux_size, 01393 Fs_per_side, 01394 sblock_pattern, 01395 &edge_list, 01396 seg_details, 01397 directionality, 01398 rr_node_indices, 01399 rr_edge_done, 01400 switch_block_conn); 01401 } 01402 01403 if(istart > 1) 01404 { 01405 num_edges += get_track_to_tracks(j, istart, itrack, CHANX, 01406 istart - 1, CHANX, nx, 01407 nodes_per_chan, 01408 opin_mux_size, 01409 Fs_per_side, 01410 sblock_pattern, 01411 &edge_list, 01412 seg_details, 01413 directionality, 01414 rr_node_indices, 01415 rr_edge_done, 01416 switch_block_conn); 01417 } 01418 01419 if(iend < nx) 01420 { 01421 num_edges += get_track_to_tracks(j, istart, itrack, CHANX, 01422 iend + 1, CHANX, nx, 01423 nodes_per_chan, 01424 opin_mux_size, 01425 Fs_per_side, 01426 sblock_pattern, 01427 &edge_list, 01428 seg_details, 01429 directionality, 01430 rr_node_indices, 01431 rr_edge_done, 01432 switch_block_conn); 01433 } 01434 01435 01436 inode = get_rr_node_index(i, j, CHANX, itrack, rr_node_indices); 01437 alloc_and_load_edges_and_switches(rr_node, inode, num_edges, 01438 rr_edge_done, edge_list); 01439 01440 while(edge_list != NULL) { 01441 next = edge_list->next; 01442 free(edge_list); 01443 edge_list = next; 01444 } 01445 01446 /* Edge arrays have now been built up. Do everything else. */ 01447 rr_node[inode].cost_index = 01448 cost_index_offset + seg_details[itrack].index; 01449 rr_node[inode].occ = 0; 01450 01451 rr_node[inode].capacity = 1; /* GLOBAL routing handled elsewhere */ 01452 01453 rr_node[inode].xlow = istart; 01454 rr_node[inode].xhigh = iend; 01455 rr_node[inode].ylow = j; 01456 rr_node[inode].yhigh = j; 01457 01458 length = iend - istart + 1; 01459 rr_node[inode].R = length * seg_details[itrack].Rmetal; 01460 rr_node[inode].C = length * seg_details[itrack].Cmetal; 01461 01462 rr_node[inode].ptc_num = itrack; 01463 rr_node[inode].type = CHANX; 01464 rr_node[inode].direction = seg_details[itrack].direction; 01465 rr_node[inode].drivers = seg_details[itrack].drivers; 01466 } 01467 } 01468 01469 01470 01471 /** Loads up all the routing resource nodes in the y-directed channel 01472 * segments starting at (i,j). 01473 */ 01474 static void 01475 build_rr_ychan(INP int i, 01476 INP int j, 01477 INP struct s_ivec ****track_to_ipin_lookup, 01478 INP struct s_ivec ***switch_block_conn, 01479 INP int cost_index_offset, 01480 INP int nodes_per_chan, 01481 INP int *opin_mux_size, 01482 INP short *****sblock_pattern, 01483 INP int Fs_per_side, 01484 INP t_seg_details * seg_details, 01485 INP t_ivec *** rr_node_indices, 01486 INP boolean * rr_edge_done, 01487 INOUTP t_rr_node * rr_node, 01488 INP int wire_to_ipin_switch, 01489 INP enum e_directionality directionality) 01490 { 01491 01492 int itrack, istart, iend, num_edges, inode, length; 01493 struct s_linked_edge *edge_list, *next; 01494 01495 01496 for(itrack = 0; itrack < nodes_per_chan; itrack++) 01497 { 01498 istart = get_seg_start(seg_details, itrack, i, j); 01499 iend = get_seg_end(seg_details, itrack, istart, i, ny); 01500 01501 if(j > istart) 01502 continue; /* Not the start of this segment. */ 01503 01504 edge_list = NULL; 01505 01506 /* First count number of edges and put the edges in a linked list. */ 01507 num_edges = 0; 01508 01509 num_edges += get_track_to_ipins(istart, i, itrack, 01510 &edge_list, 01511 rr_node_indices, 01512 track_to_ipin_lookup, 01513 seg_details, 01514 CHANY, ny, 01515 wire_to_ipin_switch, 01516 directionality); 01517 01518 if(i > 0) 01519 { 01520 num_edges += get_track_to_tracks(i, istart, itrack, CHANY, 01521 i, CHANX, ny, 01522 nodes_per_chan, 01523 opin_mux_size, 01524 Fs_per_side, 01525 sblock_pattern, 01526 &edge_list, 01527 seg_details, 01528 directionality, 01529 rr_node_indices, 01530 rr_edge_done, 01531 switch_block_conn); 01532 } 01533 01534 if(i < nx) 01535 { 01536 num_edges += get_track_to_tracks(i, istart, itrack, CHANY, 01537 i + 1, CHANX, ny, 01538 nodes_per_chan, 01539 opin_mux_size, 01540 Fs_per_side, 01541 sblock_pattern, 01542 &edge_list, 01543 seg_details, 01544 directionality, 01545 rr_node_indices, 01546 rr_edge_done, 01547 switch_block_conn); 01548 } 01549 01550 if(istart > 1) 01551 { 01552 num_edges += get_track_to_tracks(i, istart, itrack, CHANY, 01553 istart - 1, CHANY, ny, 01554 nodes_per_chan, 01555 opin_mux_size, 01556 Fs_per_side, 01557 sblock_pattern, 01558 &edge_list, 01559 seg_details, 01560 directionality, 01561 rr_node_indices, 01562 rr_edge_done, 01563 switch_block_conn); 01564 } 01565 01566 if(iend < ny) 01567 { 01568 num_edges += get_track_to_tracks(i, istart, itrack, CHANY, 01569 iend + 1, CHANY, ny, 01570 nodes_per_chan, 01571 opin_mux_size, 01572 Fs_per_side, 01573 sblock_pattern, 01574 &edge_list, 01575 seg_details, 01576 directionality, 01577 rr_node_indices, 01578 rr_edge_done, 01579 switch_block_conn); 01580 } 01581 01582 01583 inode = get_rr_node_index(i, j, CHANY, itrack, rr_node_indices); 01584 alloc_and_load_edges_and_switches(rr_node, inode, num_edges, 01585 rr_edge_done, edge_list); 01586 01587 while(edge_list != NULL) { 01588 next = edge_list->next; 01589 free(edge_list); 01590 edge_list = next; 01591 } 01592 01593 /* Edge arrays have now been built up. Do everything else. */ 01594 rr_node[inode].cost_index = 01595 cost_index_offset + seg_details[itrack].index; 01596 rr_node[inode].occ = 0; 01597 01598 rr_node[inode].capacity = 1; /* GLOBAL routing handled elsewhere */ 01599 01600 rr_node[inode].xlow = i; 01601 rr_node[inode].xhigh = i; 01602 rr_node[inode].ylow = istart; 01603 rr_node[inode].yhigh = iend; 01604 01605 length = iend - istart + 1; 01606 rr_node[inode].R = length * seg_details[itrack].Rmetal; 01607 rr_node[inode].C = length * seg_details[itrack].Cmetal; 01608 01609 rr_node[inode].ptc_num = itrack; 01610 rr_node[inode].type = CHANY; 01611 rr_node[inode].direction = seg_details[itrack].direction; 01612 rr_node[inode].drivers = seg_details[itrack].drivers; 01613 } 01614 } 01615 01616 void 01617 watch_edges(int inode, 01618 t_linked_edge * edge_list_head) 01619 { 01620 t_linked_edge *list_ptr; 01621 int i, to_node; 01622 01623 list_ptr = edge_list_head; 01624 i = 0; 01625 01626 printf("!!! Watching Node %d !!!!\n", inode); 01627 print_rr_node(stdout, rr_node, inode); 01628 printf("Currently connects to: \n"); 01629 while(list_ptr != NULL) 01630 { 01631 to_node = list_ptr->edge; 01632 print_rr_node(stdout, rr_node, to_node); 01633 list_ptr = list_ptr->next; 01634 i++; 01635 } 01636 } 01637 01638 /** Sets up all the edge related information for rr_node inode (num_edges, 01639 * the edges array and the switches array). The edge_list_head points to 01640 * a list of the num_edges edges and switches to put in the arrays. This 01641 * linked list is freed by this routine. This routine also resets the 01642 * rr_edge_done array for the next rr_node (i.e. set it so that no edges 01643 * are marked as having been seen before). 01644 */ 01645 void 01646 alloc_and_load_edges_and_switches(INP t_rr_node * rr_node, 01647 INP int inode, 01648 INP int num_edges, 01649 INOUTP boolean * rr_edge_done, 01650 INP t_linked_edge * edge_list_head) 01651 { 01652 01653 t_linked_edge *list_ptr, *prev_ptr; 01654 int i; 01655 01656 /* Check we aren't overwriting edges */ 01657 assert(rr_node[inode].num_edges < 1); 01658 assert(NULL == rr_node[inode].edges); 01659 assert(NULL == rr_node[inode].switches); 01660 01661 rr_node[inode].num_edges = num_edges; 01662 rr_node[inode].edges = (int *)my_malloc(num_edges * sizeof(int)); 01663 rr_node[inode].switches = (short *)my_malloc(num_edges * sizeof(short)); 01664 01665 i = 0; 01666 list_ptr = edge_list_head; 01667 while(list_ptr && (i < num_edges)) 01668 { 01669 rr_node[inode].edges[i] = list_ptr->edge; 01670 rr_node[inode].switches[i] = list_ptr->iswitch; 01671 01672 ++rr_node[list_ptr->edge].fan_in; 01673 01674 /* Unmark the edge since we are done considering fanout from node. */ 01675 rr_edge_done[list_ptr->edge] = FALSE; 01676 01677 prev_ptr = list_ptr; 01678 list_ptr = list_ptr->next; 01679 ++i; 01680 } 01681 assert(list_ptr == NULL); 01682 assert(i == num_edges); 01683 } 01684 01685 01686 01687 01688 01689 01690 01691 static int **** 01692 alloc_and_load_pin_to_track_map(INP enum e_pin_type pin_type, 01693 INP int nodes_per_chan, 01694 INP int Fc, 01695 INP t_type_ptr Type, 01696 INP boolean perturb_switch_pattern, 01697 INP enum e_directionality directionality) 01698 { 01699 01700 int **num_dir; /* [0..height][0..3] Number of *physical* pins on each side. */ 01701 int ***dir_list; /* [0..height][0..3][0..num_pins-1] list of pins of correct type * 01702 * * on each side. Max possible space alloced for simplicity */ 01703 01704 int i, j, k, iside, ipin, iclass, num_phys_pins, pindex, ioff; 01705 int *pin_num_ordering, *side_ordering, *offset_ordering; 01706 int **num_done_per_dir; /* [0..height][0..3] */ 01707 int ****tracks_connected_to_pin; /* [0..num_pins-1][0..height][0..3][0..Fc-1] */ 01708 01709 /* NB: This wastes some space. Could set tracks_..._pin[ipin][ioff][iside] = 01710 * NULL if there is no pin on that side, or that pin is of the wrong type. 01711 * Probably not enough memory to worry about, esp. as it's temporary. 01712 * If pin ipin on side iside does not exist or is of the wrong type, 01713 * tracks_connected_to_pin[ipin][iside][0] = OPEN. */ 01714 01715 if(Type->num_pins < 1) 01716 { 01717 return NULL; 01718 } 01719 01720 tracks_connected_to_pin = (int ****) 01721 alloc_matrix4(0, Type->num_pins - 1, 01722 0, Type->height - 1, 0, 3, 0, Fc - 1, sizeof(int)); 01723 01724 for(ipin = 0; ipin < Type->num_pins; ipin++) 01725 { 01726 for(ioff = 0; ioff < Type->height; ioff++) 01727 { 01728 for(iside = 0; iside < 4; iside++) 01729 { 01730 for(i = 0; i < Fc; ++i) 01731 { 01732 tracks_connected_to_pin[ipin][ioff][iside][i] = OPEN; /* Unconnected. */ 01733 } 01734 } 01735 } 01736 } 01737 01738 num_dir = (int **)alloc_matrix(0, Type->height - 1, 0, 3, sizeof(int)); 01739 dir_list = 01740 (int ***)alloc_matrix3(0, Type->height - 1, 0, 3, 0, 01741 Type->num_pins - 1, sizeof(int)); 01742 01743 /* Defensive coding. Try to crash hard if I use an unset entry. */ 01744 for(i = 0; i < Type->height; i++) 01745 for(j = 0; j < 4; j++) 01746 for(k = 0; k < Type->num_pins; k++) 01747 dir_list[i][j][k] = (-1); 01748 01749 for(i = 0; i < Type->height; i++) 01750 for(j = 0; j < 4; j++) 01751 num_dir[i][j] = 0; 01752 01753 for(ipin = 0; ipin < Type->num_pins; ipin++) 01754 { 01755 iclass = Type->pin_class[ipin]; 01756 if(Type->class_inf[iclass].type != pin_type) /* Doing either ipins OR opins */ 01757 continue; 01758 01759 /* Pins connecting only to global resources get no switches -> keeps the * 01760 * area model accurate. */ 01761 01762 if(Type->is_global_pin[ipin]) 01763 continue; 01764 for(ioff = 0; ioff < Type->height; ioff++) 01765 { 01766 for(iside = 0; iside < 4; iside++) 01767 { 01768 if(Type->pinloc[ioff][iside][ipin] == 1) 01769 { 01770 dir_list[ioff][iside][num_dir[ioff] 01771 [iside]] = ipin; 01772 num_dir[ioff][iside]++; 01773 } 01774 } 01775 } 01776 } 01777 01778 num_phys_pins = 0; 01779 for(ioff = 0; ioff < Type->height; ioff++) 01780 { 01781 for(iside = 0; iside < 4; iside++) 01782 num_phys_pins += num_dir[ioff][iside]; /* Num. physical pins per type */ 01783 } 01784 num_done_per_dir = 01785 (int **)alloc_matrix(0, Type->height - 1, 0, 3, sizeof(int)); 01786 for(ioff = 0; ioff < Type->height; ioff++) 01787 { 01788 for(iside = 0; iside < 4; iside++) 01789 { 01790 num_done_per_dir[ioff][iside] = 0; 01791 } 01792 } 01793 pin_num_ordering = (int *)my_malloc(num_phys_pins * sizeof(int)); 01794 side_ordering = (int *)my_malloc(num_phys_pins * sizeof(int)); 01795 offset_ordering = (int *)my_malloc(num_phys_pins * sizeof(int)); 01796 01797 /* Connection block I use distributes pins evenly across the tracks * 01798 * of ALL sides of the clb at once. Ensures that each pin connects * 01799 * to spaced out tracks in its connection block, and that the other * 01800 * pins (potentially in other C blocks) connect to the remaining tracks * 01801 * first. Doesn't matter for large Fc, but should make a fairly * 01802 * good low Fc block that leverages the fact that usually lots of pins * 01803 * are logically equivalent. */ 01804 01805 iside = LEFT; 01806 ioff = Type->height - 1; 01807 ipin = 0; 01808 pindex = -1; 01809 01810 while(ipin < num_phys_pins) 01811 { 01812 if(iside == TOP) 01813 { 01814 iside = RIGHT; 01815 } 01816 else if(iside == RIGHT) 01817 { 01818 if(ioff <= 0) 01819 { 01820 iside = BOTTOM; 01821 } 01822 else 01823 { 01824 ioff--; 01825 } 01826 } 01827 else if(iside == BOTTOM) 01828 { 01829 iside = LEFT; 01830 } 01831 else 01832 { 01833 assert(iside == LEFT); 01834 if(ioff >= Type->height - 1) 01835 { 01836 pindex++; 01837 iside = TOP; 01838 } 01839 else 01840 { 01841 ioff++; 01842 } 01843 } 01844 01845 assert(pindex < num_phys_pins); /* Number of physical pins bounds number of logical pins */ 01846 01847 if(num_done_per_dir[ioff][iside] >= num_dir[ioff][iside]) 01848 continue; 01849 pin_num_ordering[ipin] = dir_list[ioff][iside][pindex]; 01850 side_ordering[ipin] = iside; 01851 offset_ordering[ipin] = ioff; 01852 assert(Type->pinloc[ioff][iside][dir_list[ioff][iside][pindex]]); 01853 num_done_per_dir[ioff][iside]++; 01854 ipin++; 01855 } 01856 01857 if(perturb_switch_pattern) 01858 { 01859 load_perturbed_switch_pattern(Type, 01860 tracks_connected_to_pin, 01861 num_phys_pins, 01862 pin_num_ordering, 01863 side_ordering, 01864 offset_ordering, 01865 nodes_per_chan, Fc, directionality); 01866 } 01867 else 01868 { 01869 load_uniform_switch_pattern(Type, 01870 tracks_connected_to_pin, 01871 num_phys_pins, 01872 pin_num_ordering, 01873 side_ordering, 01874 offset_ordering, 01875 nodes_per_chan, Fc, directionality); 01876 } 01877 check_all_tracks_reach_pins(Type, tracks_connected_to_pin, 01878 nodes_per_chan, Fc, pin_type); 01879 01880 /* Free all temporary storage. */ 01881 free_matrix(num_dir, 0, Type->height - 1, 0, sizeof(int)); 01882 free_matrix3(dir_list, 0, Type->height - 1, 0, 3, 0, sizeof(int)); 01883 free_matrix(num_done_per_dir, 0, Type->height - 1, 0, sizeof(int)); 01884 free(pin_num_ordering); 01885 free(side_ordering); 01886 free(offset_ordering); 01887 01888 return tracks_connected_to_pin; 01889 } 01890 01891 01892 01893 /** Loads the tracks_connected_to_pin array with an even distribution of 01894 * switches across the tracks for each pin. For example, each pin connects 01895 * to every 4.3rd track in a channel, with exactly which tracks a pin 01896 * connects to staggered from pin to pin. 01897 */ 01898 static void 01899 load_uniform_switch_pattern(INP t_type_ptr type, 01900 INOUTP int ****tracks_connected_to_pin, 01901 INP int num_phys_pins, 01902 INP int *pin_num_ordering, 01903 INP int *side_ordering, 01904 INP int *offset_ordering, 01905 INP int nodes_per_chan, 01906 INP int Fc, 01907 enum e_directionality directionality) 01908 { 01909 01910 int i, j, ipin, iside, ioff, itrack, k; 01911 float f_track, fc_step; 01912 int group_size; 01913 float step_size; 01914 01915 /* Uni-directional drive is implemented to ensure no directional bias and this means 01916 * two important comments noted below */ 01917 /* 1. Spacing should be (W/2)/(Fc/2), and step_size should be spacing/(num_phys_pins), 01918 * and lay down 2 switches on an adjacent pair of tracks at a time to ensure 01919 * no directional bias. Basically, treat W (even) as W/2 pairs of tracks, and 01920 * assign switches to a pair at a time. Can do this because W is guaranteed to 01921 * be even-numbered; however same approach cannot be applied to Fc_out pattern 01922 * when L > 1 and W <> 2L multiple. 01923 * 01924 * 2. This generic pattern should be considered the tileable physical layout, 01925 * meaning all track # here are physical #'s, 01926 * so later must use vpr_to_phy conversion to find actual logical #'s to connect. 01927 * This also means I will not use get_output_block_companion_track to ensure 01928 * no bias, since that describes a logical # -> that would confuse people. */ 01929 01930 step_size = (float)nodes_per_chan / (float)(Fc * num_phys_pins); 01931 01932 if(directionality == BI_DIRECTIONAL) 01933 { 01934 group_size = 1; 01935 } 01936 else 01937 { 01938 assert(directionality == UNI_DIRECTIONAL); 01939 group_size = 2; 01940 } 01941 01942 assert((nodes_per_chan % group_size == 0) && (Fc % group_size == 0)); 01943 01944 fc_step = (float)nodes_per_chan / (float)Fc; 01945 01946 for(i = 0; i < num_phys_pins; i++) 01947 { 01948 ipin = pin_num_ordering[i]; 01949 iside = side_ordering[i]; 01950 ioff = offset_ordering[i]; 01951 01952 /* Bi-directional treats each track separately, uni-directional works with pairs of tracks */ 01953 for(j = 0; j < (Fc / group_size); j++) 01954 { 01955 f_track = (i * step_size) + (j * fc_step); 01956 itrack = ((int)f_track) * group_size; 01957 01958 /* Catch possible floating point round error */ 01959 itrack = min(itrack, nodes_per_chan - group_size); 01960 01961 /* Assign the group of tracks for the Fc pattern */ 01962 for(k = 0; k < group_size; ++k) 01963 { 01964 tracks_connected_to_pin[ipin][ioff][iside] 01965 [group_size * j + k] = itrack + k; 01966 } 01967 } 01968 } 01969 } 01970 01971 /** Loads the tracks_connected_to_pin array with an unevenly distributed 01972 * set of switches across the channel. This is done for inputs when 01973 * Fc_input = Fc_output to avoid creating "pin domains" -- certain output 01974 * pins being able to talk only to certain input pins because their switch 01975 * patterns exactly line up. Distribute Fc/2 + 1 switches over half the 01976 * channel and Fc/2 - 1 switches over the other half to make the switch 01977 * pattern different from the uniform one of the outputs. Also, have half 01978 * the pins put the "dense" part of their connections in the first half of 01979 * the channel and the other half put the "dense" part in the second half, 01980 * to make sure each track can connect to about the same number of ipins. 01981 */ 01982 static void 01983 load_perturbed_switch_pattern(INP t_type_ptr type, 01984 INOUTP int ****tracks_connected_to_pin, 01985 INP int num_phys_pins, 01986 INP int *pin_num_ordering, 01987 INP int *side_ordering, 01988 INP int *offset_ordering, 01989 INP int nodes_per_chan, 01990 INP int Fc, 01991 enum e_directionality directionality) 01992 { 01993 01994 int i, j, ipin, iside, itrack, ihalf, iconn, ioff; 01995 int Fc_dense, Fc_sparse, Fc_half[2]; 01996 float f_track, spacing_dense, spacing_sparse, spacing[2]; 01997 float step_size; 01998 01999 assert(directionality == BI_DIRECTIONAL); 02000 02001 step_size = (float)nodes_per_chan / (float)(Fc * num_phys_pins); 02002 02003 Fc_dense = (Fc / 2) + 1; 02004 Fc_sparse = Fc - Fc_dense; /* Works for even or odd Fc */ 02005 02006 spacing_dense = (float)nodes_per_chan / (float)(2 * Fc_dense); 02007 spacing_sparse = (float)nodes_per_chan / (float)(2 * Fc_sparse); 02008 02009 for(i = 0; i < num_phys_pins; i++) 02010 { 02011 ipin = pin_num_ordering[i]; 02012 iside = side_ordering[i]; 02013 ioff = offset_ordering[i]; 02014 02015 /* Flip every pin to balance switch density */ 02016 spacing[i % 2] = spacing_dense; 02017 Fc_half[i % 2] = Fc_dense; 02018 spacing[(i + 1) % 2] = spacing_sparse; 02019 Fc_half[(i + 1) % 2] = Fc_sparse; 02020 02021 f_track = i * step_size; /* Start point. Staggered from pin to pin */ 02022 iconn = 0; 02023 02024 for(ihalf = 0; ihalf < 2; ihalf++) 02025 { /* For both dense and sparse halves. */ 02026 for(j = 0; j < Fc_half[ihalf]; ++j) 02027 { 02028 itrack = (int)f_track; 02029 02030 /* Can occasionally get wraparound due to floating point rounding. 02031 This is okay because the starting position > 0 when this occurs 02032 so connection is valid and fine */ 02033 itrack = itrack % nodes_per_chan; 02034 tracks_connected_to_pin[ipin][ioff][iside][iconn] 02035 = itrack; 02036 02037 f_track += spacing[ihalf]; 02038 iconn++; 02039 } 02040 } 02041 } /* End for all physical pins. */ 02042 } 02043 02044 02045 /** Checks that all tracks can be reached by some pin. */ 02046 static void 02047 check_all_tracks_reach_pins(t_type_ptr type, 02048 int ****tracks_connected_to_pin, 02049 int nodes_per_chan, 02050 int Fc, 02051 enum e_pin_type ipin_or_opin) 02052 { 02053 int iconn, iside, itrack, ipin, ioff; 02054 int *num_conns_to_track; /* [0..nodes_per_chan-1] */ 02055 02056 assert(nodes_per_chan > 0); 02057 02058 num_conns_to_track = (int *)my_calloc(nodes_per_chan, sizeof(int)); 02059 02060 for(ipin = 0; ipin < type->num_pins; ipin++) 02061 { 02062 for(ioff = 0; ioff < type->height; ioff++) 02063 { 02064 for(iside = 0; iside < 4; iside++) 02065 { 02066 if(tracks_connected_to_pin[ipin][ioff][iside][0] 02067 != OPEN) 02068 { /* Pin exists */ 02069 for(iconn = 0; iconn < Fc; iconn++) 02070 { 02071 itrack = 02072 tracks_connected_to_pin[ipin] 02073 [ioff][iside][iconn]; 02074 num_conns_to_track[itrack]++; 02075 } 02076 } 02077 } 02078 } 02079 } 02080 02081 for(itrack = 0; itrack < nodes_per_chan; itrack++) 02082 { 02083 if(num_conns_to_track[itrack] <= 0) 02084 { 02085 printf 02086 ("Warning (check_all_tracks_reach_pins): track %d does not \n" 02087 "\tconnect to any CLB ", itrack); 02088 02089 if(ipin_or_opin == DRIVER) 02090 printf("OPINs.\n"); 02091 else 02092 printf("IPINs.\n"); 02093 } 02094 } 02095 02096 free(num_conns_to_track); 02097 } 02098 02099 /** Allocates and loads the track to ipin lookup for each physical grid type. This 02100 * is the same information as the ipin_to_track map but accessed in a different way. 02101 */ 02102 static struct s_ivec *** 02103 alloc_and_load_track_to_pin_lookup(INP int ****pin_to_track_map, 02104 INP int Fc, 02105 INP int height, 02106 INP int num_pins, 02107 INP int nodes_per_chan) 02108 { 02109 int ipin, iside, itrack, iconn, ioff, pin_counter; 02110 struct s_ivec ***track_to_pin_lookup; 02111 02112 /* [0..nodes_per_chan-1][0..height][0..3]. For each track number it stores a vector 02113 * for each of the four sides. x-directed channels will use the TOP and 02114 * BOTTOM vectors to figure out what clb input pins they connect to above 02115 * and below them, respectively, while y-directed channels use the LEFT 02116 * and RIGHT vectors. Each vector contains an nelem field saying how many 02117 * ipins it connects to. The list[0..nelem-1] array then gives the pin 02118 * numbers. */ 02119 02120 /* Note that a clb pin that connects to a channel on its RIGHT means that * 02121 * that channel connects to a clb pin on its LEFT. The convention used * 02122 * here is always in the perspective of the CLB */ 02123 02124 if(num_pins < 1) 02125 { 02126 return NULL; 02127 } 02128 02129 /* Alloc and zero the the lookup table */ 02130 track_to_pin_lookup = 02131 (struct s_ivec ***)alloc_matrix3(0, nodes_per_chan - 1, 0, 02132 height - 1, 0, 3, 02133 sizeof(struct s_ivec)); 02134 for(itrack = 0; itrack < nodes_per_chan; itrack++) 02135 { 02136 for(ioff = 0; ioff < height; ioff++) 02137 { 02138 for(iside = 0; iside < 4; iside++) 02139 { 02140 track_to_pin_lookup[itrack][ioff][iside].nelem = 02141 0; 02142 track_to_pin_lookup[itrack][ioff][iside].list = 02143 NULL; 02144 } 02145 } 02146 } 02147 02148 /* Counting pass. */ 02149 for(ipin = 0; ipin < num_pins; ipin++) 02150 { 02151 for(ioff = 0; ioff < height; ioff++) 02152 { 02153 for(iside = 0; iside < 4; iside++) 02154 { 02155 if(pin_to_track_map[ipin][ioff][iside][0] == OPEN) 02156 continue; 02157 02158 for(iconn = 0; iconn < Fc; iconn++) 02159 { 02160 itrack = 02161 pin_to_track_map[ipin][ioff][iside] 02162 [iconn]; 02163 track_to_pin_lookup[itrack][ioff][iside]. 02164 nelem++; 02165 } 02166 } 02167 } 02168 } 02169 02170 /* Allocate space. */ 02171 for(itrack = 0; itrack < nodes_per_chan; itrack++) 02172 { 02173 for(ioff = 0; ioff < height; ioff++) 02174 { 02175 for(iside = 0; iside < 4; iside++) 02176 { 02177 track_to_pin_lookup[itrack][ioff][iside].list = NULL; /* Defensive code */ 02178 if(track_to_pin_lookup[itrack][ioff][iside]. 02179 nelem != 0) 02180 { 02181 track_to_pin_lookup[itrack][ioff][iside]. 02182 list = 02183 (int *) 02184 my_malloc(track_to_pin_lookup[itrack] 02185 [ioff][iside].nelem * 02186 sizeof(int)); 02187 track_to_pin_lookup[itrack][ioff][iside]. 02188 nelem = 0; 02189 } 02190 } 02191 } 02192 } 02193 02194 /* Loading pass. */ 02195 for(ipin = 0; ipin < num_pins; ipin++) 02196 { 02197 for(ioff = 0; ioff < height; ioff++) 02198 { 02199 for(iside = 0; iside < 4; iside++) 02200 { 02201 if(pin_to_track_map[ipin][ioff][iside][0] == OPEN) 02202 continue; 02203 02204 for(iconn = 0; iconn < Fc; iconn++) 02205 { 02206 itrack = 02207 pin_to_track_map[ipin][ioff][iside] 02208 [iconn]; 02209 pin_counter = 02210 track_to_pin_lookup[itrack][ioff] 02211 [iside].nelem; 02212 track_to_pin_lookup[itrack][ioff][iside]. 02213 list[pin_counter] = ipin; 02214 track_to_pin_lookup[itrack][ioff][iside]. 02215 nelem++; 02216 } 02217 } 02218 } 02219 } 02220 02221 return track_to_pin_lookup; 02222 } 02223 02224 /** A utility routine to dump the contents of the routing resource graph 02225 * (everything -- connectivity, occupancy, cost, etc.) into a file. Used 02226 * only for debugging. 02227 */ 02228 void 02229 dump_rr_graph(INP const char *file_name) 02230 { 02231 02232 int inode; 02233 FILE *fp; 02234 02235 fp = my_fopen(file_name, "w", 0); 02236 02237 for(inode = 0; inode < num_rr_nodes; inode++) 02238 { 02239 print_rr_node(fp, rr_node, inode); 02240 fprintf(fp, "\n"); 02241 } 02242 02243 #if 0 02244 fprintf(fp, "\n\n%d rr_indexed_data entries.\n\n", num_rr_indexed_data); 02245 02246 for(index = 0; index < num_rr_indexed_data; index++) 02247 { 02248 print_rr_indexed_data(fp, index); 02249 fprintf(fp, "\n"); 02250 } 02251 #endif 02252 02253 fclose(fp); 02254 } 02255 02256 02257 /** Prints all the data about node inode to file fp. */ 02258 void 02259 print_rr_node(FILE * fp, 02260 t_rr_node * rr_node, 02261 int inode) 02262 { 02263 02264 static const char *name_type[] = { 02265 "SOURCE", "SINK", "IPIN", "OPIN", "CHANX", "CHANY", "INTRA_CLUSTER_EDGE" 02266 }; 02267 static const char *direction_name[] = { 02268 "OPEN", "INC_DIRECTION", "DEC_DIRECTION", "BI_DIRECTION" 02269 }; 02270 static const char *drivers_name[] = { 02271 "OPEN", "MULTI_BUFFER", "SINGLE" 02272 }; 02273 02274 t_rr_type rr_type; 02275 int iconn; 02276 02277 rr_type = rr_node[inode].type; 02278 02279 /* Make sure we don't overrun const arrays */ 02280 assert(rr_type < (sizeof(name_type) / sizeof(char *))); 02281 assert((rr_node[inode].direction + 1) < 02282 (sizeof(direction_name) / sizeof(char *))); 02283 assert((rr_node[inode].drivers + 1) < 02284 (sizeof(drivers_name) / sizeof(char *))); 02285 02286 fprintf(fp, "Node: %d %s ", inode, name_type[rr_type]); 02287 if((rr_node[inode].xlow == rr_node[inode].xhigh) && 02288 (rr_node[inode].ylow == rr_node[inode].yhigh)) 02289 { 02290 fprintf(fp, "(%d, %d) ", rr_node[inode].xlow, 02291 rr_node[inode].ylow); 02292 } 02293 else 02294 { 02295 fprintf(fp, "(%d, %d) to (%d, %d) ", rr_node[inode].xlow, 02296 rr_node[inode].ylow, rr_node[inode].xhigh, 02297 rr_node[inode].yhigh); 02298 } 02299 fprintf(fp, "Ptc_num: %d ", rr_node[inode].ptc_num); 02300 fprintf(fp, "Direction: %s ", 02301 direction_name[rr_node[inode].direction + 1]); 02302 fprintf(fp, "Drivers: %s ", drivers_name[rr_node[inode].drivers + 1]); 02303 fprintf(fp, "\n"); 02304 02305 fprintf(fp, "%d edge(s):", rr_node[inode].num_edges); 02306 for(iconn = 0; iconn < rr_node[inode].num_edges; iconn++) 02307 fprintf(fp, " %d", rr_node[inode].edges[iconn]); 02308 fprintf(fp, "\n"); 02309 02310 fprintf(fp, "Switch types:"); 02311 for(iconn = 0; iconn < rr_node[inode].num_edges; iconn++) 02312 fprintf(fp, " %d", rr_node[inode].switches[iconn]); 02313 fprintf(fp, "\n"); 02314 02315 fprintf(fp, "Occ: %d Capacity: %d\n", rr_node[inode].occ, 02316 rr_node[inode].capacity); 02317 if(rr_type != INTRA_CLUSTER_EDGE) { 02318 fprintf(fp, "R: %g C: %g\n", rr_node[inode].R, rr_node[inode].C); 02319 } 02320 fprintf(fp, "Cost_index: %d\n", rr_node[inode].cost_index); 02321 } 02322 02323 02324 /** Prints all the rr_indexed_data of index to file fp. */ 02325 void 02326 print_rr_indexed_data(FILE * fp, 02327 int index) 02328 { 02329 02330 fprintf(fp, "Index: %d\n", index); 02331 02332 fprintf(fp, "ortho_cost_index: %d ", 02333 rr_indexed_data[index].ortho_cost_index); 02334 fprintf(fp, "base_cost: %g ", rr_indexed_data[index].saved_base_cost); 02335 fprintf(fp, "saved_base_cost: %g\n", 02336 rr_indexed_data[index].saved_base_cost); 02337 02338 fprintf(fp, "Seg_index: %d ", rr_indexed_data[index].seg_index); 02339 fprintf(fp, "inv_length: %g\n", rr_indexed_data[index].inv_length); 02340 02341 fprintf(fp, "T_linear: %g ", rr_indexed_data[index].T_linear); 02342 fprintf(fp, "T_quadratic: %g ", rr_indexed_data[index].T_quadratic); 02343 fprintf(fp, "C_load: %g\n", rr_indexed_data[index].C_load); 02344 } 02345 02346 02347 /** This routine returns a list of the opins rr_nodes on each 02348 * side/offset of the block. You must free the result with 02349 * free_matrix. 02350 */ 02351 static void 02352 build_unidir_rr_opins(INP int i, 02353 INP int j, 02354 INP struct s_grid_tile **grid, 02355 INP int *Fc_out, 02356 INP int nodes_per_chan, 02357 INP t_seg_details * seg_details, 02358 INOUTP int **Fc_xofs, 02359 INOUTP int **Fc_yofs, 02360 INOUTP t_rr_node * rr_node, 02361 INOUTP boolean * rr_edge_done, 02362 OUTP boolean * Fc_clipped, 02363 INP t_ivec *** rr_node_indices) 02364 { 02365 t_type_ptr type; 02366 int ipin, iclass, ofs, chan, seg, max_len, inode; 02367 enum e_side side; 02368 t_rr_type chan_type; 02369 t_linked_edge *edge_list = NULL, *next; 02370 boolean clipped, vert, pos_dir; 02371 int num_edges; 02372 int **Fc_ofs; 02373 02374 *Fc_clipped = FALSE; 02375 02376 /* Only the base block of a set should use this function */ 02377 if(grid[i][j].offset > 0) 02378 { 02379 return; 02380 } 02381 02382 type = grid[i][j].type; 02383 02384 /* Go through each pin and find its fanout. */ 02385 for(ipin = 0; ipin < type->num_pins; ++ipin) 02386 { 02387 /* Skip global pins and ipins */ 02388 iclass = type->pin_class[ipin]; 02389 if(type->class_inf[iclass].type != DRIVER) 02390 { 02391 continue; 02392 } 02393 if(type->is_global_pin[ipin]) 02394 { 02395 continue; 02396 } 02397 02398 num_edges = 0; 02399 edge_list = NULL; 02400 for(ofs = 0; ofs < type->height; ++ofs) 02401 { 02402 for(side = 0; side < 4; ++side) 02403 { 02404 /* Can't do anything if pin isn't at this location */ 02405 if(0 == type->pinloc[ofs][side][ipin]) 02406 { 02407 continue; 02408 } 02409 02410 /* Figure out the chan seg at that side. 02411 * side is the side of the logic or io block. */ 02412 vert = ((side == TOP) || (side == BOTTOM)); 02413 pos_dir = ((side == TOP) || (side == RIGHT)); 02414 chan_type = (vert ? CHANX : CHANY); 02415 chan = (vert ? (j + ofs) : i); 02416 seg = (vert ? i : (j + ofs)); 02417 max_len = (vert ? nx : ny); 02418 Fc_ofs = (vert ? Fc_xofs : Fc_yofs); 02419 if(FALSE == pos_dir) 02420 { 02421 --chan; 02422 } 02423 02424 /* Skip the location if there is no channel. */ 02425 if(chan < 0) 02426 { 02427 continue; 02428 } 02429 if(seg < 1) 02430 { 02431 continue; 02432 } 02433 if(seg > (vert ? nx : ny)) 02434 { 02435 continue; 02436 } 02437 if(chan > (vert ? ny : nx)) 02438 { 02439 continue; 02440 } 02441 02442 /* Get the list of opin to mux connections for that chan seg. */ 02443 num_edges += 02444 get_unidir_opin_connections(chan, seg, 02445 Fc_out[type-> 02446 index], 02447 chan_type, 02448 seg_details, 02449 &edge_list, 02450 Fc_ofs, 02451 rr_edge_done, 02452 max_len, 02453 nodes_per_chan, 02454 rr_node_indices, 02455 &clipped); 02456 if(clipped) 02457 { 02458 *Fc_clipped = TRUE; 02459 } 02460 } 02461 } 02462 02463 /* Add the edges */ 02464 inode = get_rr_node_index(i, j, OPIN, ipin, rr_node_indices); 02465 alloc_and_load_edges_and_switches(rr_node, inode, num_edges, 02466 rr_edge_done, edge_list); 02467 while(edge_list != NULL) { 02468 next = edge_list->next; 02469 free(edge_list); 02470 edge_list = next; 02471 } 02472 } 02473 } 02474 #if 0 02475 static void 02476 load_uniform_opin_switch_pattern_paired(INP int *Fc_out, 02477 INP int num_pins, 02478 INP int *pins_in_chan_seg, 02479 INP int num_wire_inc_muxes, 02480 INP int num_wire_dec_muxes, 02481 INP int *wire_inc_muxes, 02482 INP int *wire_dec_muxes, 02483 INOUTP t_rr_node * rr_node, 02484 INOUTP boolean * rr_edge_done, 02485 INP t_seg_details * seg_details, 02486 OUTP boolean * Fc_clipped) 02487 { 02488 02489 /* Directionality is assumed to be uni-directional */ 02490 02491 /* Make turn-based assignment to avoid overlap when Fc_ouput is low. This is a bipartite 02492 * matching problem. Out of "num_wire_muxes" muxes "Fc_output" of them is assigned 02493 * to each outpin (total "num_pins" of them); assignment is uniform (spacing - spreadout) 02494 * and staggered to avoid overlap when Fc_output is low. */ 02495 02496 /* The natural order wires muxes are stored in wire_muxes is alternating in directionality 02497 * already (by my implementation), so no need to do anything extra to avoid directional bias */ 02498 02499 /* TODO: Due to spacing, it's possible to have directional bias: all Fc_out wires connected 02500 * to one opin goes in either INC or DEC -> whereas I want a mix of both. 02501 * SOLUTION: Use quantization of 2 to ensure that if an opin connects to one wire, it 02502 * must also connect to its companion wire, which runs in the opposite direction. This 02503 * means instead of having num_wire_muxes as the matching set, pick out the INC wires 02504 * in num_wires_muxes as the matching set (the DEC wires are their companions) April 17, 2007 02505 * NEWS: That solution does not work, as treating wires in groups will lead to serious 02506 * abnormal patterns (conns crossing multiple blocks) for W nonquantized to multiples of 2L. 02507 * So, I'm chaning that approach to a new one that avoids directional bias: I will separate 02508 * the INC muxes and DEC muxes into two sets. Each set is uniformly assigned to opins with 02509 * Fc_output/2; this should be identical as before for normal cases and contains all conns 02510 * in the same chan segment for the nonquantized cases. */ 02511 02512 /* Finally, separated the two approaches: 1. Take all wire muxes and assign them to opins 02513 * one at a time (load_uniform_opin_switch_pattern) 2. Take pairs (by companion) 02514 * of wire muxes and assign them to opins a pair at a time (load_uniform_opin_switch_pattern_paired). 02515 * The first is used for fringe channel segments (ends of channels, where 02516 * there are lots of muxes due to partial wire segments) and the second is used in core */ 02517 02518 /* float spacing, step_size, f_mux; */ 02519 int ipin, iconn, num_edges, init_mux; 02520 int from_node, to_node, to_track; 02521 int xlow, ylow; 02522 t_linked_edge *edge_list; 02523 int *wire_muxes; 02524 int k, num_wire_muxes, Fc_output_per_side, CurFc; 02525 int count_inc, count_dec; 02526 t_type_ptr type; 02527 02528 *Fc_clipped = FALSE; 02529 02530 count_inc = count_dec = 0; 02531 02532 for(ipin = 0; ipin < num_pins; ipin++) 02533 { 02534 from_node = pins_in_chan_seg[ipin]; 02535 xlow = rr_node[from_node].xlow; 02536 ylow = rr_node[from_node].ylow; 02537 type = grid[xlow][ylow].type; 02538 edge_list = NULL; 02539 num_edges = 0; 02540 02541 /* Assigning the INC muxes first, then DEC muxes */ 02542 for(k = 0; k < 2; ++k) 02543 { 02544 if(k == 0) 02545 { 02546 num_wire_muxes = num_wire_inc_muxes; 02547 wire_muxes = wire_inc_muxes; 02548 } 02549 else 02550 { 02551 num_wire_muxes = num_wire_dec_muxes; 02552 wire_muxes = wire_dec_muxes; 02553 } 02554 02555 /* Half the Fc will be assigned for each direction. */ 02556 assert(Fc_out[type->index] % 2 == 0); 02557 Fc_output_per_side = Fc_out[type->index] / 2; 02558 02559 /* Clip the demand. Make sure to use a new variable so 02560 * on the second pass it is not clipped. */ 02561 CurFc = Fc_output_per_side; 02562 if(Fc_output_per_side > num_wire_muxes) 02563 { 02564 *Fc_clipped = TRUE; 02565 CurFc = num_wire_muxes; 02566 } 02567 02568 if(k == 0) 02569 { 02570 init_mux = (count_inc) % num_wire_muxes; 02571 count_inc += CurFc; 02572 } 02573 else 02574 { 02575 init_mux = (count_dec) % num_wire_muxes; 02576 count_dec += CurFc; 02577 } 02578 02579 for(iconn = 0; iconn < CurFc; iconn++) 02580 { 02581 /* FINALLY, make the outpin to mux connection */ 02582 /* Latest update: I'm not using Uniform Pattern, but a similarly staggered pattern */ 02583 to_node = 02584 wire_muxes[(init_mux + 02585 iconn) % num_wire_muxes]; 02586 02587 rr_node[to_node].num_opin_drivers++; /* keep track of mux size */ 02588 to_track = rr_node[to_node].ptc_num; 02589 02590 if(FALSE == rr_edge_done[to_node]) 02591 { 02592 /* Use of alloc_and_load_edges_and_switches 02593 * must be accompanied by rr_edge_done check. */ 02594 rr_edge_done[to_node] = TRUE; 02595 edge_list = 02596 insert_in_edge_list(edge_list, 02597 to_node, 02598 seg_details 02599 [to_track]. 02600 wire_switch); 02601 num_edges++; 02602 } 02603 } 02604 } 02605 02606 if(num_edges < 1) 02607 { 02608 printf 02609 ("Error: opin %d at (%d,%d) does not connect to any " 02610 "tracks.\n", rr_node[from_node].ptc_num, 02611 rr_node[from_node].xlow, rr_node[from_node].ylow); 02612 exit(1); 02613 } 02614 02615 alloc_and_load_edges_and_switches(rr_node, from_node, num_edges, 02616 rr_edge_done, edge_list); 02617 } 02618 } 02619 #endif 02620 02621 #if MUX_SIZE_DIST_DISPLAY 02622 /** This routine prints and dumps statistics on the mux sizes on a sblock 02623 * per sblock basis, over the entire chip. Mux sizes should be balanced (off by 02624 * at most 1) for all muxes in the same sblock in the core, and corner sblocks. 02625 * Fringe sblocks will have imbalance due to missing one side and constrains on 02626 * where wires must connect. Comparing two core sblock sblocks, muxes need not 02627 * be balanced if W is not quantized to 2L multiples, again for reasons that 02628 * there could be sblocks with different number of muxes but same number of incoming 02629 * wires that need to make connections to these muxes (we don't want to under-connect 02630 * user-specified Fc and Fs). 02631 */ 02632 static void 02633 view_mux_size_distribution(t_ivec *** rr_node_indices, 02634 int nodes_per_chan, 02635 t_seg_details * seg_details_x, 02636 t_seg_details * seg_details_y) 02637 { 02638 02639 int i, j, itrack, seg_num, chan_num, max_len; 02640 int start, end, inode, max_value, min_value; 02641 int array_count, k, num_muxes; 02642 short direction, side; 02643 float *percent_range_array; 02644 float percent_range, percent_range_sum, avg_percent_range; 02645 float std_dev_percent_range, deviation_f; 02646 int range, *range_array, global_max_range; 02647 float avg_range, range_sum, std_dev_range; 02648 t_seg_details *seg_details; 02649 t_mux *new_mux, *sblock_mux_list_head, *current, *next; 02650 02651 #ifdef ENABLE_DUMP 02652 FILE *dump_file_per_sblock, *dump_file; 02653 #endif /* ENABLE_DUMP */ 02654 t_mux_size_distribution *distr_list, *distr_current, *new_distribution, 02655 *distr_next; 02656 02657 #ifdef ENABLE_DUMP 02658 dump_file = my_fopen("mux_size_dump.txt", "w", 0); 02659 dump_file_per_sblock = my_fopen("mux_size_per_sblock_dump.txt", "w", 0); 02660 #endif /* ENABLE_DUMP */ 02661 02662 sblock_mux_list_head = NULL; 02663 percent_range_array = 02664 (float *)my_malloc((nx - 1) * (ny - 1) * sizeof(float)); 02665 range_array = (int *)my_malloc((nx - 1) * (ny - 1) * sizeof(int)); 02666 array_count = 0; 02667 percent_range_sum = 0.0; 02668 range_sum = 0.0; 02669 global_max_range = 0; 02670 min_value = 0; 02671 max_value = 0; 02672 seg_num = 0; 02673 chan_num = 0; 02674 direction = 0; 02675 seg_details = 0; 02676 max_len = 0; 02677 distr_list = NULL; 02678 02679 /* With the specified range, I'm only looking at core sblocks */ 02680 for(j = (ny - 1); j > 0; j--) 02681 { 02682 for(i = 1; i < nx; i++) 02683 { 02684 num_muxes = 0; 02685 for(side = 0; side < 4; side++) 02686 { 02687 switch (side) 02688 { 02689 case LEFT: 02690 seg_num = i; 02691 chan_num = j; 02692 direction = DEC_DIRECTION; /* only DEC have muxes in that sblock */ 02693 seg_details = seg_details_x; 02694 max_len = nx; 02695 break; 02696 02697 case RIGHT: 02698 seg_num = i + 1; 02699 chan_num = j; 02700 direction = INC_DIRECTION; 02701 seg_details = seg_details_x; 02702 max_len = nx; 02703 break; 02704 02705 case TOP: 02706 seg_num = j + 1; 02707 chan_num = i; 02708 direction = INC_DIRECTION; 02709 seg_details = seg_details_y; 02710 max_len = ny; 02711 break; 02712 02713 case BOTTOM: 02714 seg_num = j; 02715 chan_num = i; 02716 direction = DEC_DIRECTION; 02717 seg_details = seg_details_y; 02718 max_len = ny; 02719 break; 02720 02721 default: 02722 assert(FALSE); 02723 } 02724 02725 assert(nodes_per_chan > 0); 02726 for(itrack = 0; itrack < nodes_per_chan; itrack++) 02727 { 02728 start = 02729 get_seg_start(seg_details, itrack, 02730 seg_num, chan_num); 02731 end = 02732 get_seg_end(seg_details, itrack, 02733 start, chan_num, max_len); 02734 02735 if((seg_details[itrack].direction == 02736 direction) && (((start == seg_num) 02737 && (direction == 02738 INC_DIRECTION)) 02739 || ((end == seg_num) 02740 && (direction == 02741 DEC_DIRECTION)))) 02742 { /* mux found */ 02743 num_muxes++; 02744 if(side == LEFT || side == RIGHT) 02745 { /* CHANX */ 02746 inode = 02747 get_rr_node_index 02748 (seg_num, chan_num, 02749 CHANX, itrack, 02750 rr_node_indices); 02751 } 02752 else 02753 { 02754 assert((side == TOP) || (side == BOTTOM)); /* CHANY */ 02755 inode = 02756 get_rr_node_index 02757 (chan_num, seg_num, 02758 CHANY, itrack, 02759 rr_node_indices); 02760 } 02761 02762 new_mux = (t_mux *) 02763 my_malloc(sizeof(t_mux)); 02764 new_mux->size = 02765 rr_node[inode]. 02766 num_wire_drivers + 02767 rr_node[inode]. 02768 num_opin_drivers; 02769 new_mux->next = NULL; 02770 02771 /* insert in linked list, descending */ 02772 if(sblock_mux_list_head == NULL) 02773 { 02774 /* first entry */ 02775 sblock_mux_list_head = 02776 new_mux; 02777 } 02778 else if(sblock_mux_list_head-> 02779 size < new_mux->size) 02780 { 02781 /* insert before head */ 02782 new_mux->next = 02783 sblock_mux_list_head; 02784 sblock_mux_list_head = 02785 new_mux; 02786 } 02787 else 02788 { 02789 /* insert after head */ 02790 current = 02791 sblock_mux_list_head; 02792 next = current->next; 02793 02794 while((next != NULL) 02795 && (next->size > 02796 new_mux->size)) 02797 { 02798 current = next; 02799 next = 02800 current->next; 02801 } 02802 02803 if(next == NULL) 02804 { 02805 current->next = 02806 new_mux; 02807 } 02808 else 02809 { 02810 new_mux->next = 02811 current->next; 02812 current->next = 02813 new_mux; 02814 } 02815 } 02816 /* end of insert in linked list */ 02817 } 02818 } 02819 } /* end of mux searching over all four sides of sblock */ 02820 /* now sblock_mux_list_head holds a linked list of all muxes in this sblock */ 02821 02822 current = sblock_mux_list_head; 02823 02824 #ifdef ENABLE_DUMP 02825 fprintf(dump_file_per_sblock, 02826 "sblock at (%d, %d) has mux sizes: {", i, j); 02827 #endif /* ENABLE_DUMP */ 02828 02829 if(current != NULL) 02830 { 02831 max_value = min_value = current->size; 02832 } 02833 while(current != NULL) 02834 { 02835 if(max_value < current->size) 02836 max_value = current->size; 02837 if(min_value > current->size) 02838 min_value = current->size; 02839 02840 #ifdef ENABLE_DUMP 02841 fprintf(dump_file_per_sblock, "%d ", 02842 current->size); 02843 fprintf(dump_file, "%d\n", current->size); 02844 #endif /* ENABLE_DUMP */ 02845 02846 current = current->next; 02847 } 02848 02849 #ifdef ENABLE_DUMP 02850 fprintf(dump_file_per_sblock, "}\n\tmax: %d\tmin:%d", 02851 max_value, min_value); 02852 #endif /* ENABLE_DUMP */ 02853 02854 range = max_value - min_value; 02855 percent_range = ((float)range) / ((float)min_value); 02856 02857 if(global_max_range < range) 02858 global_max_range = range; 02859 02860 #ifdef ENABLE_DUMP 02861 fprintf(dump_file_per_sblock, 02862 "\t\trange: %d\t\tpercent range:%.2f\n", 02863 range, percent_range); 02864 #endif /* ENABLE_DUMP */ 02865 02866 percent_range_array[array_count] = percent_range; 02867 range_array[array_count] = range; 02868 02869 percent_range_sum += percent_range; 02870 range_sum += range; 02871 02872 array_count++; 02873 02874 /* I will use a distribution for each (core) sblock type. 02875 * There are more than 1 type of sblocks, 02876 * when quantization of W to 2L multiples is not observed. */ 02877 02878 02879 02880 distr_current = distr_list; 02881 while(distr_current != NULL 02882 && distr_current->mux_count != num_muxes) 02883 { 02884 distr_current = distr_current->next; 02885 } 02886 02887 if(distr_current == NULL) 02888 { 02889 /* Create a distribution for the new sblock type, 02890 * and put it as head of linked list by convention */ 02891 02892 new_distribution = (t_mux_size_distribution *) 02893 my_malloc(sizeof(t_mux_size_distribution)); 02894 new_distribution->mux_count = num_muxes; 02895 new_distribution->max_index = max_value; 02896 new_distribution->distr = 02897 (int *)my_calloc(max_value + 1, sizeof(int)); 02898 02899 /* filling in the distribution */ 02900 current = sblock_mux_list_head; 02901 while(current != NULL) 02902 { 02903 assert(current->size <= 02904 new_distribution->max_index); 02905 new_distribution->distr[current->size]++; 02906 current = current->next; 02907 } 02908 02909 /* add it to head */ 02910 new_distribution->next = distr_list; 02911 distr_list = new_distribution; 02912 } 02913 else 02914 { 02915 /* distr_current->mux_count == num_muxes so add this sblock's mux sizes in this distribution */ 02916 current = sblock_mux_list_head; 02917 02918 while(current != NULL) 02919 { 02920 if(current->size > 02921 distr_current->max_index) 02922 { 02923 /* needs to realloc to expand the distribution array to hold the new large-valued data */ 02924 distr_current->distr = 02925 my_realloc(distr_current-> 02926 distr, 02927 (current->size + 02928 1) * sizeof(int)); 02929 02930 /* initializing the newly allocated elements */ 02931 for(k = 02932 (distr_current->max_index + 02933 1); k <= current->size; k++) 02934 distr_current->distr[k] = 0; 02935 02936 distr_current->max_index = 02937 current->size; 02938 distr_current->distr[current-> 02939 size]++; 02940 } 02941 else 02942 { 02943 distr_current->distr[current-> 02944 size]++; 02945 } 02946 current = current->next; 02947 } 02948 } 02949 02950 /* done - now free memory */ 02951 current = sblock_mux_list_head; 02952 while(current != NULL) 02953 { 02954 next = current->next; 02955 free(current); 02956 current = next; 02957 } 02958 sblock_mux_list_head = NULL; 02959 } 02960 } 02961 02962 avg_percent_range = (float)percent_range_sum / array_count; 02963 avg_range = (float)range_sum / array_count; 02964 02965 percent_range_sum = 0.0; 02966 range_sum = 0.0; 02967 for(k = 0; k < array_count; k++) 02968 { 02969 deviation_f = (percent_range_array[k] - avg_percent_range); 02970 percent_range_sum += deviation_f * deviation_f; 02971 02972 deviation_f = ((float)range_array[k] - avg_range); 02973 range_sum += deviation_f * deviation_f; 02974 } 02975 std_dev_percent_range = 02976 sqrt(percent_range_sum / ((float)array_count - 1.0)); 02977 std_dev_range = sqrt(range_sum / ((float)array_count - 1.0)); 02978 printf("==== MUX size statistics ====\n"); 02979 printf("max range of mux size within a sblock is; %d\n", 02980 global_max_range); 02981 printf("average range of mux size within a sblock is; %.2f\n", avg_range); 02982 printf("std dev of range of mux size within a sblock is; %.2f\n", 02983 std_dev_range); 02984 printf 02985 ("average percent range of mux size within a sblock is; %.2f%%\n", 02986 avg_percent_range * 100.0); 02987 printf 02988 ("std dev of percent range of mux size within a sblock is; %.2f%%\n", 02989 std_dev_percent_range * 100.0); 02990 02991 printf(" -- Detailed MUX size distribution by sblock type -- \n"); 02992 distr_current = distr_list; 02993 while(distr_current != NULL) 02994 { 02995 print_distribution(stdout, distr_current); 02996 02997 /* free */ 02998 distr_next = distr_current->next; 02999 03000 free(distr_current->distr); 03001 free(distr_current); 03002 03003 distr_current = distr_next; 03004 } 03005 03006 free(percent_range_array); 03007 free(range_array); 03008 #ifdef ENABLE_DUMP 03009 fclose(dump_file_per_sblock); 03010 fclose(dump_file); 03011 #endif /* ENABLE_DUMP */ 03012 } 03013 03014 static void 03015 print_distribution(FILE * fptr, 03016 t_mux_size_distribution * distr_struct) 03017 { 03018 int *distr; 03019 int k; 03020 float sum; 03021 boolean zeros; 03022 03023 distr = distr_struct->distr; 03024 fprintf(fptr, 03025 "For Sblocks containing %d MUXes, the MUX size distribution is:\n", 03026 distr_struct->mux_count); 03027 fprintf(fptr, "\t\t\tSize\t\t\tFrequency (percent)\n"); 03028 03029 sum = 0.0; 03030 for(k = 0; k <= distr_struct->max_index; k++) 03031 sum += distr[k]; 03032 03033 zeros = TRUE; 03034 for(k = 0; k <= distr_struct->max_index; k++) 03035 { 03036 if(zeros && (distr[k] == 0)) 03037 { 03038 /* do nothing for leading string of zeros */ 03039 } 03040 else 03041 { 03042 zeros = FALSE; /* leading string of zeros ended */ 03043 fprintf(fptr, "\t\t\t%d\t\t\t%d (%.2f%%)\n", k, distr[k], 03044 (float)distr[k] / sum * 100.0); 03045 } 03046 } 03047 fprintf(fptr, "\nEnd of this Sblock MUX size distribution.\n"); 03048 03049 } 03050 #endif