VPR-6.0

vpr/SRC/route/rr_graph_indexed_data.c

Go to the documentation of this file.
00001 #include <math.h>               /* Needed only for sqrt call (remove if sqrt removed) */
00002 #include "util.h"
00003 #include "vpr_types.h"
00004 #include "globals.h"
00005 #include "rr_graph_util.h"
00006 #include "rr_graph2.h"
00007 #include "rr_graph_indexed_data.h"
00008 #include "read_xml_arch_file.h"
00009 
00010 /******************* Subroutines local to this module ************************/
00011 
00012 static void load_rr_indexed_data_base_costs(int nodes_per_chan,
00013                                             t_ivec *** rr_node_indices,
00014                                             enum e_base_cost_type
00015                                             base_cost_type,
00016                                             int wire_to_ipin_switch);
00017 
00018 static float get_delay_normalization_fac(int nodes_per_chan,
00019                                          t_ivec *** rr_node_indices);
00020 
00021 static float get_average_opin_delay(t_ivec *** rr_node_indices,
00022                                     int nodes_per_chan);
00023 
00024 static void load_rr_indexed_data_T_values(int index_start,
00025                                           int num_indices_to_load,
00026                                           t_rr_type rr_type,
00027                                           int nodes_per_chan,
00028                                           t_ivec *** rr_node_indices,
00029                                           t_segment_inf * segment_inf);
00030 
00031 
00032 
00033 /******************** Subroutine definitions *********************************/
00034 
00035 /** Allocates the rr_indexed_data array and loads it with appropriate values. 
00036  * It currently stores the segment type (or OPEN if the index doesn't        
00037  * correspond to an CHANX or CHANY type), the base cost of nodes of that     
00038  * type, and some info to allow rapid estimates of time to get to a target   
00039  * to be computed by the router.                                             
00040  *
00041  * Right now all SOURCES have the same base cost; and similarly there's only 
00042  * one base cost for each of SINKs, OPINs, and IPINs (four total).  This can 
00043  * be changed just by allocating more space in the array below and changing  
00044  * the cost_index values for these rr_nodes, if you want to make some pins   
00045  * etc. more expensive than others.  I give each segment type in an          
00046  * x-channel its own cost_index, and each segment type in a y-channel its    
00047  * own cost_index.                                                           
00048  */
00049 void
00050 alloc_and_load_rr_indexed_data(INP t_segment_inf * segment_inf,
00051                                INP int num_segment,
00052                                INP t_ivec *** rr_node_indices,
00053                                INP int nodes_per_chan,
00054                                int wire_to_ipin_switch,
00055                                enum e_base_cost_type base_cost_type)
00056 {
00057 
00058     int iseg, length, i, index;
00059 
00060     num_rr_indexed_data = CHANX_COST_INDEX_START + (2 * num_segment);
00061     rr_indexed_data = (t_rr_indexed_data *) my_malloc(num_rr_indexed_data *
00062                                                       sizeof
00063                                                       (t_rr_indexed_data));
00064 
00065     /* For rr_types that aren't CHANX or CHANY, base_cost is valid, but most     *
00066      * * other fields are invalid.  For IPINs, the T_linear field is also valid;   *
00067      * * all other fields are invalid.  For SOURCES, SINKs and OPINs, all fields   *
00068      * * other than base_cost are invalid. Mark invalid fields as OPEN for safety. */
00069 
00070     for(i = SOURCE_COST_INDEX; i <= IPIN_COST_INDEX; i++)
00071         {
00072             rr_indexed_data[i].ortho_cost_index = OPEN;
00073             rr_indexed_data[i].seg_index = OPEN;
00074             rr_indexed_data[i].inv_length = OPEN;
00075             rr_indexed_data[i].T_linear = OPEN;
00076             rr_indexed_data[i].T_quadratic = OPEN;
00077             rr_indexed_data[i].C_load = OPEN;
00078         }
00079 
00080     rr_indexed_data[IPIN_COST_INDEX].T_linear =
00081         switch_inf[wire_to_ipin_switch].Tdel;
00082 
00083     /* X-directed segments. */
00084 
00085     for(iseg = 0; iseg < num_segment; iseg++)
00086         {
00087             index = CHANX_COST_INDEX_START + iseg;
00088 
00089             rr_indexed_data[index].ortho_cost_index = index + num_segment;
00090 
00091             if(segment_inf[iseg].longline)
00092                 length = nx;
00093             else
00094                 length = min(segment_inf[iseg].length, nx);
00095 
00096 
00097             rr_indexed_data[index].inv_length = 1. / length;
00098             rr_indexed_data[index].seg_index = iseg;
00099         }
00100 
00101     load_rr_indexed_data_T_values(CHANX_COST_INDEX_START, num_segment,
00102                                   CHANX, nodes_per_chan, rr_node_indices,
00103                                   segment_inf);
00104 
00105     /* Y-directed segments. */
00106 
00107     for(iseg = 0; iseg < num_segment; iseg++)
00108         {
00109             index = CHANX_COST_INDEX_START + num_segment + iseg;
00110 
00111             rr_indexed_data[index].ortho_cost_index = index - num_segment;
00112 
00113             if(segment_inf[iseg].longline)
00114                 length = ny;
00115             else
00116                 length = min(segment_inf[iseg].length, ny);
00117 
00118             rr_indexed_data[index].inv_length = 1. / length;
00119             rr_indexed_data[index].seg_index = iseg;
00120         }
00121 
00122     load_rr_indexed_data_T_values((CHANX_COST_INDEX_START + num_segment),
00123                                   num_segment, CHANY, nodes_per_chan,
00124                                   rr_node_indices, segment_inf);
00125 
00126     load_rr_indexed_data_base_costs(nodes_per_chan, rr_node_indices,
00127                                     base_cost_type, wire_to_ipin_switch);
00128 
00129 }
00130 
00131 /** Loads the base_cost member of rr_indexed_data according to the specified 
00132  * base_cost_type.                                                          
00133  */
00134 static void
00135 load_rr_indexed_data_base_costs(int nodes_per_chan,
00136                                 t_ivec *** rr_node_indices,
00137                                 enum e_base_cost_type base_cost_type,
00138                                 int wire_to_ipin_switch)
00139 {
00140 
00141     float delay_normalization_fac;
00142     int index;
00143 
00144     if(base_cost_type == DELAY_NORMALIZED)
00145         {
00146             delay_normalization_fac =
00147                 get_delay_normalization_fac(nodes_per_chan, rr_node_indices);
00148         }
00149     else
00150         {
00151             delay_normalization_fac = 1.;
00152         }
00153 
00154     if(base_cost_type == DEMAND_ONLY || base_cost_type == DELAY_NORMALIZED)
00155         {
00156             rr_indexed_data[SOURCE_COST_INDEX].base_cost =
00157                 delay_normalization_fac;
00158             rr_indexed_data[SINK_COST_INDEX].base_cost = 0.;
00159             rr_indexed_data[OPIN_COST_INDEX].base_cost =
00160                 delay_normalization_fac;
00161 
00162 #ifndef SPEC
00163             rr_indexed_data[IPIN_COST_INDEX].base_cost =
00164                 0.95 * delay_normalization_fac;
00165 #else /* Avoid roundoff for SPEC */
00166             rr_indexed_data[IPIN_COST_INDEX].base_cost =
00167                 delay_normalization_fac;
00168 #endif
00169         }
00170 
00171     else if(base_cost_type == INTRINSIC_DELAY)
00172         {
00173             rr_indexed_data[SOURCE_COST_INDEX].base_cost = 0.;
00174             rr_indexed_data[SINK_COST_INDEX].base_cost = 0.;
00175             rr_indexed_data[OPIN_COST_INDEX].base_cost =
00176                 get_average_opin_delay(rr_node_indices, nodes_per_chan);
00177             rr_indexed_data[IPIN_COST_INDEX].base_cost =
00178                 switch_inf[wire_to_ipin_switch].Tdel;
00179         }
00180 
00181 /* Load base costs for CHANX and CHANY segments */
00182 
00183     for(index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++)
00184         {
00185             if(base_cost_type == INTRINSIC_DELAY)
00186                 rr_indexed_data[index].base_cost =
00187                     rr_indexed_data[index].T_linear +
00188                     rr_indexed_data[index].T_quadratic;
00189             else
00190 /*       rr_indexed_data[index].base_cost = delay_normalization_fac /
00191                                     rr_indexed_data[index].inv_length;  */
00192 
00193                 rr_indexed_data[index].base_cost = delay_normalization_fac;
00194 /*       rr_indexed_data[index].base_cost = delay_normalization_fac *
00195                   sqrt (1. / rr_indexed_data[index].inv_length);  */
00196 /*       rr_indexed_data[index].base_cost = delay_normalization_fac *
00197                   (1. + 1. / rr_indexed_data[index].inv_length);  */
00198         }
00199 
00200 /* Save a copy of the base costs -- if dynamic costing is used by the     * 
00201  * router, the base_cost values will get changed all the time and being   *
00202  * able to restore them from a saved version is useful.                   */
00203 
00204     for(index = 0; index < num_rr_indexed_data; index++)
00205         {
00206             rr_indexed_data[index].saved_base_cost =
00207                 rr_indexed_data[index].base_cost;
00208         }
00209 }
00210 
00211 
00212 /** Returns the average delay to go 1 CLB distance along a wire.  */
00213 static float
00214 get_delay_normalization_fac(int nodes_per_chan,
00215                             t_ivec *** rr_node_indices)
00216 {
00217     const int clb_dist = 3;     /* Number of CLBs I think the average conn. goes. */
00218 
00219     int inode, itrack, cost_index;
00220     float Tdel, Tdel_sum, frac_num_seg;
00221 
00222     Tdel_sum = 0.;
00223 
00224     for(itrack = 0; itrack < nodes_per_chan; itrack++)
00225         {
00226             inode =
00227                 get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, CHANX, itrack,
00228                                   rr_node_indices);
00229             cost_index = rr_node[inode].cost_index;
00230             frac_num_seg = clb_dist * rr_indexed_data[cost_index].inv_length;
00231             Tdel = frac_num_seg * rr_indexed_data[cost_index].T_linear +
00232                 frac_num_seg * frac_num_seg *
00233                 rr_indexed_data[cost_index].T_quadratic;
00234             Tdel_sum += Tdel / (float)clb_dist;
00235         }
00236 
00237     for(itrack = 0; itrack < nodes_per_chan; itrack++)
00238         {
00239             inode =
00240                 get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, CHANY, itrack,
00241                                   rr_node_indices);
00242             cost_index = rr_node[inode].cost_index;
00243             frac_num_seg = clb_dist * rr_indexed_data[cost_index].inv_length;
00244             Tdel = frac_num_seg * rr_indexed_data[cost_index].T_linear +
00245                 frac_num_seg * frac_num_seg *
00246                 rr_indexed_data[cost_index].T_quadratic;
00247             Tdel_sum += Tdel / (float)clb_dist;
00248         }
00249 
00250     return (Tdel_sum / (2. * nodes_per_chan));
00251 }
00252 
00253 
00254 /** Returns the average delay from an OPIN to a wire in an adjacent channel. */
00255 static float
00256 get_average_opin_delay(t_ivec *** rr_node_indices,
00257                        int nodes_per_chan)
00258 {
00259 
00260     /* RESEARCH TODO: Got to think if this heuristic needs to change for hetero, right now, I'll calculate
00261      * the average delay of non-IO blocks */
00262     int inode, ipin, iclass, iedge, itype, num_edges, to_switch, to_node,
00263         num_conn;
00264     float Cload, Tdel;
00265 
00266     Tdel = 0.;
00267     num_conn = 0;
00268     for(itype = 0;
00269         itype < num_types && &type_descriptors[itype] != IO_TYPE; itype++)
00270         {
00271             for(ipin = 0; ipin < type_descriptors[itype].num_pins; ipin++)
00272                 {
00273                     iclass = type_descriptors[itype].pin_class[ipin];
00274                     if(type_descriptors[itype].class_inf[iclass].type ==
00275                        DRIVER)
00276                         {       /* OPIN */
00277                             inode =
00278                                 get_rr_node_index((nx + 1) / 2, (ny + 1) / 2,
00279                                                   OPIN, ipin,
00280                                                   rr_node_indices);
00281                             num_edges = rr_node[inode].num_edges;
00282 
00283                             for(iedge = 0; iedge < num_edges; iedge++)
00284                                 {
00285                                     to_node = rr_node[inode].edges[iedge];
00286                                     to_switch =
00287                                         rr_node[inode].switches[iedge];
00288                                     Cload = rr_node[to_node].C;
00289                                     Tdel +=
00290                                         Cload * switch_inf[to_switch].R +
00291                                         switch_inf[to_switch].Tdel;
00292                                     num_conn++;
00293                                 }
00294                         }
00295                 }
00296         }
00297 
00298     Tdel /= (float)num_conn;
00299     return (Tdel);
00300 }
00301 
00302 /** Loads the average propagation times through segments of each index type  
00303  * for either all CHANX segment types or all CHANY segment types.  It does  
00304  * this by looking at all the segments in one channel in the middle of the  
00305  * array and averaging the R and C values of all segments of the same type 
00306  * and using them to compute average delay values for this type of segment. 
00307  */
00308 static void
00309 load_rr_indexed_data_T_values(int index_start,
00310                               int num_indices_to_load,
00311                               t_rr_type rr_type,
00312                               int nodes_per_chan,
00313                               t_ivec *** rr_node_indices,
00314                               t_segment_inf * segment_inf)
00315 {
00316 
00317     int itrack, iseg, inode, cost_index, iswitch;
00318     float *C_total, *R_total;   /* [0..num_rr_indexed_data - 1] */
00319     int *num_nodes_of_index;    /* [0..num_rr_indexed_data - 1] */
00320     float Rnode, Cnode, Rsw, Tsw;
00321 
00322     num_nodes_of_index = (int *)my_calloc(num_rr_indexed_data, sizeof(int));
00323     C_total = (float *)my_calloc(num_rr_indexed_data, sizeof(float));
00324     R_total = (float *)my_calloc(num_rr_indexed_data, sizeof(float));
00325 
00326 /* Get average C and R values for all the segments of this type in one      *
00327  * channel segment, near the middle of the array.                           */
00328 
00329     for(itrack = 0; itrack < nodes_per_chan; itrack++)
00330         {
00331             inode =
00332                 get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, rr_type, itrack,
00333                                   rr_node_indices);
00334             cost_index = rr_node[inode].cost_index;
00335             num_nodes_of_index[cost_index]++;
00336             C_total[cost_index] += rr_node[inode].C;
00337             R_total[cost_index] += rr_node[inode].R;
00338         }
00339 
00340 
00341     for(cost_index = index_start;
00342         cost_index < index_start + num_indices_to_load; cost_index++)
00343         {
00344 
00345             if(num_nodes_of_index[cost_index] == 0)
00346                 {               /* Segments don't exist. */
00347                     rr_indexed_data[cost_index].T_linear = OPEN;
00348                     rr_indexed_data[cost_index].T_quadratic = OPEN;
00349                     rr_indexed_data[cost_index].C_load = OPEN;
00350                 }
00351             else
00352                 {
00353                     Rnode =
00354                         R_total[cost_index] / num_nodes_of_index[cost_index];
00355                     Cnode =
00356                         C_total[cost_index] / num_nodes_of_index[cost_index];
00357                     iseg = rr_indexed_data[cost_index].seg_index;
00358                     iswitch = segment_inf[iseg].wire_switch;
00359                     Rsw = switch_inf[iswitch].R;
00360                     Tsw = switch_inf[iswitch].Tdel;
00361 
00362                     if(switch_inf[iswitch].buffered)
00363                         {
00364                             rr_indexed_data[cost_index].T_linear =
00365                                 Tsw + Rsw * Cnode + 0.5 * Rnode * Cnode;
00366                             rr_indexed_data[cost_index].T_quadratic = 0.;
00367                             rr_indexed_data[cost_index].C_load = 0.;
00368                         }
00369                     else
00370                         {       /* Pass transistor */
00371                             rr_indexed_data[cost_index].C_load = Cnode;
00372 
00373                             /* See Dec. 23, 1997 notes for deriviation of formulae. */
00374 
00375                             rr_indexed_data[cost_index].T_linear =
00376                                 Tsw + 0.5 * Rsw * Cnode;
00377                             rr_indexed_data[cost_index].T_quadratic =
00378                                 (Rsw + Rnode) * 0.5 * Cnode;
00379                         }
00380                 }
00381         }
00382 
00383     free(num_nodes_of_index);
00384     free(C_total);
00385     free(R_total);
00386 }