00001 #include <math.h>
00002 #include "util.h"
00003 #include "vpr_types.h"
00004 #include "globals.h"
00005 #include "rr_graph_util.h"
00006 #include "rr_graph2.h"
00007 #include "rr_graph_indexed_data.h"
00008
00009
00010
00011
00012 static void load_rr_indexed_data_base_costs(int nodes_per_chan,
00013 t_ivec *** rr_node_indices,
00014 enum e_base_cost_type
00015 base_cost_type,
00016 int wire_to_ipin_switch);
00017
00018 static float get_delay_normalization_fac(int nodes_per_chan,
00019 t_ivec *** rr_node_indices);
00020
00021 static float get_average_opin_delay(t_ivec *** rr_node_indices,
00022 int nodes_per_chan);
00023
00024 static void load_rr_indexed_data_T_values(int index_start,
00025 int num_indices_to_load,
00026 t_rr_type rr_type,
00027 int nodes_per_chan,
00028 t_ivec *** rr_node_indices,
00029 t_segment_inf * segment_inf);
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048 void
00049 alloc_and_load_rr_indexed_data(IN t_segment_inf * segment_inf,
00050 IN int num_segment,
00051 IN t_ivec *** rr_node_indices,
00052 IN int nodes_per_chan,
00053 int wire_to_ipin_switch,
00054 enum e_base_cost_type base_cost_type)
00055 {
00056
00057 int iseg, length, i, index;
00058
00059 num_rr_indexed_data = CHANX_COST_INDEX_START + (2 * num_segment);
00060 rr_indexed_data = (t_rr_indexed_data *) my_malloc(num_rr_indexed_data *
00061 sizeof
00062 (t_rr_indexed_data));
00063
00064
00065
00066
00067
00068
00069 for(i = SOURCE_COST_INDEX; i <= IPIN_COST_INDEX; i++)
00070 {
00071 rr_indexed_data[i].ortho_cost_index = OPEN;
00072 rr_indexed_data[i].seg_index = OPEN;
00073 rr_indexed_data[i].inv_length = OPEN;
00074 rr_indexed_data[i].T_linear = OPEN;
00075 rr_indexed_data[i].T_quadratic = OPEN;
00076 rr_indexed_data[i].C_load = OPEN;
00077 }
00078
00079 rr_indexed_data[IPIN_COST_INDEX].T_linear =
00080 switch_inf[wire_to_ipin_switch].Tdel;
00081
00082
00083
00084 for(iseg = 0; iseg < num_segment; iseg++)
00085 {
00086 index = CHANX_COST_INDEX_START + iseg;
00087
00088 rr_indexed_data[index].ortho_cost_index = index + num_segment;
00089
00090 if(segment_inf[iseg].longline)
00091 length = nx;
00092 else
00093 length = min(segment_inf[iseg].length, nx);
00094
00095
00096 rr_indexed_data[index].inv_length = 1. / length;
00097 rr_indexed_data[index].seg_index = iseg;
00098 }
00099
00100 load_rr_indexed_data_T_values(CHANX_COST_INDEX_START, num_segment,
00101 CHANX, nodes_per_chan, rr_node_indices,
00102 segment_inf);
00103
00104
00105
00106 for(iseg = 0; iseg < num_segment; iseg++)
00107 {
00108 index = CHANX_COST_INDEX_START + num_segment + iseg;
00109
00110 rr_indexed_data[index].ortho_cost_index = index - num_segment;
00111
00112 if(segment_inf[iseg].longline)
00113 length = ny;
00114 else
00115 length = min(segment_inf[iseg].length, ny);
00116
00117 rr_indexed_data[index].inv_length = 1. / length;
00118 rr_indexed_data[index].seg_index = iseg;
00119 }
00120
00121 load_rr_indexed_data_T_values((CHANX_COST_INDEX_START + num_segment),
00122 num_segment, CHANY, nodes_per_chan,
00123 rr_node_indices, segment_inf);
00124
00125 load_rr_indexed_data_base_costs(nodes_per_chan, rr_node_indices,
00126 base_cost_type, wire_to_ipin_switch);
00127
00128 }
00129
00130
00131 static void
00132 load_rr_indexed_data_base_costs(int nodes_per_chan,
00133 t_ivec *** rr_node_indices,
00134 enum e_base_cost_type base_cost_type,
00135 int wire_to_ipin_switch)
00136 {
00137
00138
00139
00140
00141 float delay_normalization_fac;
00142 int index;
00143
00144 if(base_cost_type == DELAY_NORMALIZED)
00145 {
00146 delay_normalization_fac =
00147 get_delay_normalization_fac(nodes_per_chan, rr_node_indices);
00148 }
00149 else
00150 {
00151 delay_normalization_fac = 1.;
00152 }
00153
00154 if(base_cost_type == DEMAND_ONLY || base_cost_type == DELAY_NORMALIZED)
00155 {
00156 rr_indexed_data[SOURCE_COST_INDEX].base_cost =
00157 delay_normalization_fac;
00158 rr_indexed_data[SINK_COST_INDEX].base_cost = 0.;
00159 rr_indexed_data[OPIN_COST_INDEX].base_cost =
00160 delay_normalization_fac;
00161
00162 #ifndef SPEC
00163 rr_indexed_data[IPIN_COST_INDEX].base_cost =
00164 0.95 * delay_normalization_fac;
00165 #else
00166 rr_indexed_data[IPIN_COST_INDEX].base_cost =
00167 delay_normalization_fac;
00168 #endif
00169 }
00170
00171 else if(base_cost_type == INTRINSIC_DELAY)
00172 {
00173 rr_indexed_data[SOURCE_COST_INDEX].base_cost = 0.;
00174 rr_indexed_data[SINK_COST_INDEX].base_cost = 0.;
00175 rr_indexed_data[OPIN_COST_INDEX].base_cost =
00176 get_average_opin_delay(rr_node_indices, nodes_per_chan);
00177 rr_indexed_data[IPIN_COST_INDEX].base_cost =
00178 switch_inf[wire_to_ipin_switch].Tdel;
00179 }
00180
00181
00182
00183 for(index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++)
00184 {
00185 if(base_cost_type == INTRINSIC_DELAY)
00186 rr_indexed_data[index].base_cost =
00187 rr_indexed_data[index].T_linear +
00188 rr_indexed_data[index].T_quadratic;
00189 else
00190
00191
00192
00193 rr_indexed_data[index].base_cost = delay_normalization_fac;
00194
00195
00196
00197
00198 }
00199
00200
00201
00202
00203
00204 for(index = 0; index < num_rr_indexed_data; index++)
00205 {
00206 rr_indexed_data[index].saved_base_cost =
00207 rr_indexed_data[index].base_cost;
00208 }
00209 }
00210
00211
00212 static float
00213 get_delay_normalization_fac(int nodes_per_chan,
00214 t_ivec *** rr_node_indices)
00215 {
00216
00217
00218
00219 const int clb_dist = 3;
00220
00221 int inode, itrack, cost_index;
00222 float Tdel, Tdel_sum, frac_num_seg;
00223
00224 Tdel_sum = 0.;
00225
00226 for(itrack = 0; itrack < nodes_per_chan; itrack++)
00227 {
00228 inode =
00229 get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, CHANX, itrack,
00230 rr_node_indices);
00231 cost_index = rr_node[inode].cost_index;
00232 frac_num_seg = clb_dist * rr_indexed_data[cost_index].inv_length;
00233 Tdel = frac_num_seg * rr_indexed_data[cost_index].T_linear +
00234 frac_num_seg * frac_num_seg *
00235 rr_indexed_data[cost_index].T_quadratic;
00236 Tdel_sum += Tdel / (float)clb_dist;
00237 }
00238
00239 for(itrack = 0; itrack < nodes_per_chan; itrack++)
00240 {
00241 inode =
00242 get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, CHANY, itrack,
00243 rr_node_indices);
00244 cost_index = rr_node[inode].cost_index;
00245 frac_num_seg = clb_dist * rr_indexed_data[cost_index].inv_length;
00246 Tdel = frac_num_seg * rr_indexed_data[cost_index].T_linear +
00247 frac_num_seg * frac_num_seg *
00248 rr_indexed_data[cost_index].T_quadratic;
00249 Tdel_sum += Tdel / (float)clb_dist;
00250 }
00251
00252 return (Tdel_sum / (2. * nodes_per_chan));
00253 }
00254
00255
00256 static float
00257 get_average_opin_delay(t_ivec *** rr_node_indices,
00258 int nodes_per_chan)
00259 {
00260
00261
00262
00263
00264 int inode, ipin, iclass, iedge, itype, num_edges, to_switch, to_node,
00265 num_conn;
00266 float Cload, Tdel;
00267
00268 Tdel = 0.;
00269 num_conn = 0;
00270 for(itype = 0;
00271 itype < num_types && &type_descriptors[itype] != IO_TYPE; itype++)
00272 {
00273 for(ipin = 0; ipin < type_descriptors[itype].num_pins; ipin++)
00274 {
00275 iclass = type_descriptors[itype].pin_class[ipin];
00276 if(type_descriptors[itype].class_inf[iclass].type ==
00277 DRIVER)
00278 {
00279 inode =
00280 get_rr_node_index((nx + 1) / 2, (ny + 1) / 2,
00281 OPIN, ipin,
00282 rr_node_indices);
00283 num_edges = rr_node[inode].num_edges;
00284
00285 for(iedge = 0; iedge < num_edges; iedge++)
00286 {
00287 to_node = rr_node[inode].edges[iedge];
00288 to_switch =
00289 rr_node[inode].switches[iedge];
00290 Cload = rr_node[to_node].C;
00291 Tdel +=
00292 Cload * switch_inf[to_switch].R +
00293 switch_inf[to_switch].Tdel;
00294 num_conn++;
00295 }
00296 }
00297 }
00298 }
00299
00300 Tdel /= (float)num_conn;
00301 return (Tdel);
00302 }
00303
00304
00305 static void
00306 load_rr_indexed_data_T_values(int index_start,
00307 int num_indices_to_load,
00308 t_rr_type rr_type,
00309 int nodes_per_chan,
00310 t_ivec *** rr_node_indices,
00311 t_segment_inf * segment_inf)
00312 {
00313
00314
00315
00316
00317
00318
00319
00320 int itrack, iseg, inode, cost_index, iswitch;
00321 float *C_total, *R_total;
00322 int *num_nodes_of_index;
00323 float Rnode, Cnode, Rsw, Tsw;
00324
00325 num_nodes_of_index = (int *)my_calloc(num_rr_indexed_data, sizeof(int));
00326 C_total = (float *)my_calloc(num_rr_indexed_data, sizeof(float));
00327 R_total = (float *)my_calloc(num_rr_indexed_data, sizeof(float));
00328
00329
00330
00331
00332 for(itrack = 0; itrack < nodes_per_chan; itrack++)
00333 {
00334 inode =
00335 get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, rr_type, itrack,
00336 rr_node_indices);
00337 cost_index = rr_node[inode].cost_index;
00338 num_nodes_of_index[cost_index]++;
00339 C_total[cost_index] += rr_node[inode].C;
00340 R_total[cost_index] += rr_node[inode].R;
00341 }
00342
00343
00344 for(cost_index = index_start;
00345 cost_index < index_start + num_indices_to_load; cost_index++)
00346 {
00347
00348 if(num_nodes_of_index[cost_index] == 0)
00349 {
00350 rr_indexed_data[cost_index].T_linear = OPEN;
00351 rr_indexed_data[cost_index].T_quadratic = OPEN;
00352 rr_indexed_data[cost_index].C_load = OPEN;
00353 }
00354 else
00355 {
00356 Rnode =
00357 R_total[cost_index] / num_nodes_of_index[cost_index];
00358 Cnode =
00359 C_total[cost_index] / num_nodes_of_index[cost_index];
00360 iseg = rr_indexed_data[cost_index].seg_index;
00361 iswitch = segment_inf[iseg].wire_switch;
00362 Rsw = switch_inf[iswitch].R;
00363 Tsw = switch_inf[iswitch].Tdel;
00364
00365 if(switch_inf[iswitch].buffered)
00366 {
00367 rr_indexed_data[cost_index].T_linear =
00368 Tsw + Rsw * Cnode + 0.5 * Rnode * Cnode;
00369 rr_indexed_data[cost_index].T_quadratic = 0.;
00370 rr_indexed_data[cost_index].C_load = 0.;
00371 }
00372 else
00373 {
00374 rr_indexed_data[cost_index].C_load = Cnode;
00375
00376
00377
00378 rr_indexed_data[cost_index].T_linear =
00379 Tsw + 0.5 * Rsw * Cnode;
00380 rr_indexed_data[cost_index].T_quadratic =
00381 (Rsw + Rnode) * 0.5 * Cnode;
00382 }
00383 }
00384 }
00385
00386 free(num_nodes_of_index);
00387 free(C_total);
00388 free(R_total);
00389 }