VPR-6.0
|
00001 #include <math.h> /* Needed only for sqrt call (remove if sqrt removed) */ 00002 #include "util.h" 00003 #include "vpr_types.h" 00004 #include "globals.h" 00005 #include "rr_graph_util.h" 00006 #include "rr_graph2.h" 00007 #include "rr_graph_indexed_data.h" 00008 #include "read_xml_arch_file.h" 00009 00010 /******************* Subroutines local to this module ************************/ 00011 00012 static void load_rr_indexed_data_base_costs(int nodes_per_chan, 00013 t_ivec *** rr_node_indices, 00014 enum e_base_cost_type 00015 base_cost_type, 00016 int wire_to_ipin_switch); 00017 00018 static float get_delay_normalization_fac(int nodes_per_chan, 00019 t_ivec *** rr_node_indices); 00020 00021 static float get_average_opin_delay(t_ivec *** rr_node_indices, 00022 int nodes_per_chan); 00023 00024 static void load_rr_indexed_data_T_values(int index_start, 00025 int num_indices_to_load, 00026 t_rr_type rr_type, 00027 int nodes_per_chan, 00028 t_ivec *** rr_node_indices, 00029 t_segment_inf * segment_inf); 00030 00031 00032 00033 /******************** Subroutine definitions *********************************/ 00034 00035 /** Allocates the rr_indexed_data array and loads it with appropriate values. 00036 * It currently stores the segment type (or OPEN if the index doesn't 00037 * correspond to an CHANX or CHANY type), the base cost of nodes of that 00038 * type, and some info to allow rapid estimates of time to get to a target 00039 * to be computed by the router. 00040 * 00041 * Right now all SOURCES have the same base cost; and similarly there's only 00042 * one base cost for each of SINKs, OPINs, and IPINs (four total). This can 00043 * be changed just by allocating more space in the array below and changing 00044 * the cost_index values for these rr_nodes, if you want to make some pins 00045 * etc. more expensive than others. I give each segment type in an 00046 * x-channel its own cost_index, and each segment type in a y-channel its 00047 * own cost_index. 00048 */ 00049 void 00050 alloc_and_load_rr_indexed_data(INP t_segment_inf * segment_inf, 00051 INP int num_segment, 00052 INP t_ivec *** rr_node_indices, 00053 INP int nodes_per_chan, 00054 int wire_to_ipin_switch, 00055 enum e_base_cost_type base_cost_type) 00056 { 00057 00058 int iseg, length, i, index; 00059 00060 num_rr_indexed_data = CHANX_COST_INDEX_START + (2 * num_segment); 00061 rr_indexed_data = (t_rr_indexed_data *) my_malloc(num_rr_indexed_data * 00062 sizeof 00063 (t_rr_indexed_data)); 00064 00065 /* For rr_types that aren't CHANX or CHANY, base_cost is valid, but most * 00066 * * other fields are invalid. For IPINs, the T_linear field is also valid; * 00067 * * all other fields are invalid. For SOURCES, SINKs and OPINs, all fields * 00068 * * other than base_cost are invalid. Mark invalid fields as OPEN for safety. */ 00069 00070 for(i = SOURCE_COST_INDEX; i <= IPIN_COST_INDEX; i++) 00071 { 00072 rr_indexed_data[i].ortho_cost_index = OPEN; 00073 rr_indexed_data[i].seg_index = OPEN; 00074 rr_indexed_data[i].inv_length = OPEN; 00075 rr_indexed_data[i].T_linear = OPEN; 00076 rr_indexed_data[i].T_quadratic = OPEN; 00077 rr_indexed_data[i].C_load = OPEN; 00078 } 00079 00080 rr_indexed_data[IPIN_COST_INDEX].T_linear = 00081 switch_inf[wire_to_ipin_switch].Tdel; 00082 00083 /* X-directed segments. */ 00084 00085 for(iseg = 0; iseg < num_segment; iseg++) 00086 { 00087 index = CHANX_COST_INDEX_START + iseg; 00088 00089 rr_indexed_data[index].ortho_cost_index = index + num_segment; 00090 00091 if(segment_inf[iseg].longline) 00092 length = nx; 00093 else 00094 length = min(segment_inf[iseg].length, nx); 00095 00096 00097 rr_indexed_data[index].inv_length = 1. / length; 00098 rr_indexed_data[index].seg_index = iseg; 00099 } 00100 00101 load_rr_indexed_data_T_values(CHANX_COST_INDEX_START, num_segment, 00102 CHANX, nodes_per_chan, rr_node_indices, 00103 segment_inf); 00104 00105 /* Y-directed segments. */ 00106 00107 for(iseg = 0; iseg < num_segment; iseg++) 00108 { 00109 index = CHANX_COST_INDEX_START + num_segment + iseg; 00110 00111 rr_indexed_data[index].ortho_cost_index = index - num_segment; 00112 00113 if(segment_inf[iseg].longline) 00114 length = ny; 00115 else 00116 length = min(segment_inf[iseg].length, ny); 00117 00118 rr_indexed_data[index].inv_length = 1. / length; 00119 rr_indexed_data[index].seg_index = iseg; 00120 } 00121 00122 load_rr_indexed_data_T_values((CHANX_COST_INDEX_START + num_segment), 00123 num_segment, CHANY, nodes_per_chan, 00124 rr_node_indices, segment_inf); 00125 00126 load_rr_indexed_data_base_costs(nodes_per_chan, rr_node_indices, 00127 base_cost_type, wire_to_ipin_switch); 00128 00129 } 00130 00131 /** Loads the base_cost member of rr_indexed_data according to the specified 00132 * base_cost_type. 00133 */ 00134 static void 00135 load_rr_indexed_data_base_costs(int nodes_per_chan, 00136 t_ivec *** rr_node_indices, 00137 enum e_base_cost_type base_cost_type, 00138 int wire_to_ipin_switch) 00139 { 00140 00141 float delay_normalization_fac; 00142 int index; 00143 00144 if(base_cost_type == DELAY_NORMALIZED) 00145 { 00146 delay_normalization_fac = 00147 get_delay_normalization_fac(nodes_per_chan, rr_node_indices); 00148 } 00149 else 00150 { 00151 delay_normalization_fac = 1.; 00152 } 00153 00154 if(base_cost_type == DEMAND_ONLY || base_cost_type == DELAY_NORMALIZED) 00155 { 00156 rr_indexed_data[SOURCE_COST_INDEX].base_cost = 00157 delay_normalization_fac; 00158 rr_indexed_data[SINK_COST_INDEX].base_cost = 0.; 00159 rr_indexed_data[OPIN_COST_INDEX].base_cost = 00160 delay_normalization_fac; 00161 00162 #ifndef SPEC 00163 rr_indexed_data[IPIN_COST_INDEX].base_cost = 00164 0.95 * delay_normalization_fac; 00165 #else /* Avoid roundoff for SPEC */ 00166 rr_indexed_data[IPIN_COST_INDEX].base_cost = 00167 delay_normalization_fac; 00168 #endif 00169 } 00170 00171 else if(base_cost_type == INTRINSIC_DELAY) 00172 { 00173 rr_indexed_data[SOURCE_COST_INDEX].base_cost = 0.; 00174 rr_indexed_data[SINK_COST_INDEX].base_cost = 0.; 00175 rr_indexed_data[OPIN_COST_INDEX].base_cost = 00176 get_average_opin_delay(rr_node_indices, nodes_per_chan); 00177 rr_indexed_data[IPIN_COST_INDEX].base_cost = 00178 switch_inf[wire_to_ipin_switch].Tdel; 00179 } 00180 00181 /* Load base costs for CHANX and CHANY segments */ 00182 00183 for(index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++) 00184 { 00185 if(base_cost_type == INTRINSIC_DELAY) 00186 rr_indexed_data[index].base_cost = 00187 rr_indexed_data[index].T_linear + 00188 rr_indexed_data[index].T_quadratic; 00189 else 00190 /* rr_indexed_data[index].base_cost = delay_normalization_fac / 00191 rr_indexed_data[index].inv_length; */ 00192 00193 rr_indexed_data[index].base_cost = delay_normalization_fac; 00194 /* rr_indexed_data[index].base_cost = delay_normalization_fac * 00195 sqrt (1. / rr_indexed_data[index].inv_length); */ 00196 /* rr_indexed_data[index].base_cost = delay_normalization_fac * 00197 (1. + 1. / rr_indexed_data[index].inv_length); */ 00198 } 00199 00200 /* Save a copy of the base costs -- if dynamic costing is used by the * 00201 * router, the base_cost values will get changed all the time and being * 00202 * able to restore them from a saved version is useful. */ 00203 00204 for(index = 0; index < num_rr_indexed_data; index++) 00205 { 00206 rr_indexed_data[index].saved_base_cost = 00207 rr_indexed_data[index].base_cost; 00208 } 00209 } 00210 00211 00212 /** Returns the average delay to go 1 CLB distance along a wire. */ 00213 static float 00214 get_delay_normalization_fac(int nodes_per_chan, 00215 t_ivec *** rr_node_indices) 00216 { 00217 const int clb_dist = 3; /* Number of CLBs I think the average conn. goes. */ 00218 00219 int inode, itrack, cost_index; 00220 float Tdel, Tdel_sum, frac_num_seg; 00221 00222 Tdel_sum = 0.; 00223 00224 for(itrack = 0; itrack < nodes_per_chan; itrack++) 00225 { 00226 inode = 00227 get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, CHANX, itrack, 00228 rr_node_indices); 00229 cost_index = rr_node[inode].cost_index; 00230 frac_num_seg = clb_dist * rr_indexed_data[cost_index].inv_length; 00231 Tdel = frac_num_seg * rr_indexed_data[cost_index].T_linear + 00232 frac_num_seg * frac_num_seg * 00233 rr_indexed_data[cost_index].T_quadratic; 00234 Tdel_sum += Tdel / (float)clb_dist; 00235 } 00236 00237 for(itrack = 0; itrack < nodes_per_chan; itrack++) 00238 { 00239 inode = 00240 get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, CHANY, itrack, 00241 rr_node_indices); 00242 cost_index = rr_node[inode].cost_index; 00243 frac_num_seg = clb_dist * rr_indexed_data[cost_index].inv_length; 00244 Tdel = frac_num_seg * rr_indexed_data[cost_index].T_linear + 00245 frac_num_seg * frac_num_seg * 00246 rr_indexed_data[cost_index].T_quadratic; 00247 Tdel_sum += Tdel / (float)clb_dist; 00248 } 00249 00250 return (Tdel_sum / (2. * nodes_per_chan)); 00251 } 00252 00253 00254 /** Returns the average delay from an OPIN to a wire in an adjacent channel. */ 00255 static float 00256 get_average_opin_delay(t_ivec *** rr_node_indices, 00257 int nodes_per_chan) 00258 { 00259 00260 /* RESEARCH TODO: Got to think if this heuristic needs to change for hetero, right now, I'll calculate 00261 * the average delay of non-IO blocks */ 00262 int inode, ipin, iclass, iedge, itype, num_edges, to_switch, to_node, 00263 num_conn; 00264 float Cload, Tdel; 00265 00266 Tdel = 0.; 00267 num_conn = 0; 00268 for(itype = 0; 00269 itype < num_types && &type_descriptors[itype] != IO_TYPE; itype++) 00270 { 00271 for(ipin = 0; ipin < type_descriptors[itype].num_pins; ipin++) 00272 { 00273 iclass = type_descriptors[itype].pin_class[ipin]; 00274 if(type_descriptors[itype].class_inf[iclass].type == 00275 DRIVER) 00276 { /* OPIN */ 00277 inode = 00278 get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, 00279 OPIN, ipin, 00280 rr_node_indices); 00281 num_edges = rr_node[inode].num_edges; 00282 00283 for(iedge = 0; iedge < num_edges; iedge++) 00284 { 00285 to_node = rr_node[inode].edges[iedge]; 00286 to_switch = 00287 rr_node[inode].switches[iedge]; 00288 Cload = rr_node[to_node].C; 00289 Tdel += 00290 Cload * switch_inf[to_switch].R + 00291 switch_inf[to_switch].Tdel; 00292 num_conn++; 00293 } 00294 } 00295 } 00296 } 00297 00298 Tdel /= (float)num_conn; 00299 return (Tdel); 00300 } 00301 00302 /** Loads the average propagation times through segments of each index type 00303 * for either all CHANX segment types or all CHANY segment types. It does 00304 * this by looking at all the segments in one channel in the middle of the 00305 * array and averaging the R and C values of all segments of the same type 00306 * and using them to compute average delay values for this type of segment. 00307 */ 00308 static void 00309 load_rr_indexed_data_T_values(int index_start, 00310 int num_indices_to_load, 00311 t_rr_type rr_type, 00312 int nodes_per_chan, 00313 t_ivec *** rr_node_indices, 00314 t_segment_inf * segment_inf) 00315 { 00316 00317 int itrack, iseg, inode, cost_index, iswitch; 00318 float *C_total, *R_total; /* [0..num_rr_indexed_data - 1] */ 00319 int *num_nodes_of_index; /* [0..num_rr_indexed_data - 1] */ 00320 float Rnode, Cnode, Rsw, Tsw; 00321 00322 num_nodes_of_index = (int *)my_calloc(num_rr_indexed_data, sizeof(int)); 00323 C_total = (float *)my_calloc(num_rr_indexed_data, sizeof(float)); 00324 R_total = (float *)my_calloc(num_rr_indexed_data, sizeof(float)); 00325 00326 /* Get average C and R values for all the segments of this type in one * 00327 * channel segment, near the middle of the array. */ 00328 00329 for(itrack = 0; itrack < nodes_per_chan; itrack++) 00330 { 00331 inode = 00332 get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, rr_type, itrack, 00333 rr_node_indices); 00334 cost_index = rr_node[inode].cost_index; 00335 num_nodes_of_index[cost_index]++; 00336 C_total[cost_index] += rr_node[inode].C; 00337 R_total[cost_index] += rr_node[inode].R; 00338 } 00339 00340 00341 for(cost_index = index_start; 00342 cost_index < index_start + num_indices_to_load; cost_index++) 00343 { 00344 00345 if(num_nodes_of_index[cost_index] == 0) 00346 { /* Segments don't exist. */ 00347 rr_indexed_data[cost_index].T_linear = OPEN; 00348 rr_indexed_data[cost_index].T_quadratic = OPEN; 00349 rr_indexed_data[cost_index].C_load = OPEN; 00350 } 00351 else 00352 { 00353 Rnode = 00354 R_total[cost_index] / num_nodes_of_index[cost_index]; 00355 Cnode = 00356 C_total[cost_index] / num_nodes_of_index[cost_index]; 00357 iseg = rr_indexed_data[cost_index].seg_index; 00358 iswitch = segment_inf[iseg].wire_switch; 00359 Rsw = switch_inf[iswitch].R; 00360 Tsw = switch_inf[iswitch].Tdel; 00361 00362 if(switch_inf[iswitch].buffered) 00363 { 00364 rr_indexed_data[cost_index].T_linear = 00365 Tsw + Rsw * Cnode + 0.5 * Rnode * Cnode; 00366 rr_indexed_data[cost_index].T_quadratic = 0.; 00367 rr_indexed_data[cost_index].C_load = 0.; 00368 } 00369 else 00370 { /* Pass transistor */ 00371 rr_indexed_data[cost_index].C_load = Cnode; 00372 00373 /* See Dec. 23, 1997 notes for deriviation of formulae. */ 00374 00375 rr_indexed_data[cost_index].T_linear = 00376 Tsw + 0.5 * Rsw * Cnode; 00377 rr_indexed_data[cost_index].T_quadratic = 00378 (Rsw + Rnode) * 0.5 * Cnode; 00379 } 00380 } 00381 } 00382 00383 free(num_nodes_of_index); 00384 free(C_total); 00385 free(R_total); 00386 }