SRC/rr_graph_area.c File Reference

#include <math.h>
#include "util.h"
#include "vpr_types.h"
#include <assert.h>
#include "globals.h"
#include "rr_graph_util.h"
#include "rr_graph_area.h"

Include dependency graph for rr_graph_area.c:

Go to the source code of this file.

Functions
static void	count_bidir_routing_transistors (int num_switch, float R_minW_nmos, float R_minW_pmos)
static void	count_unidir_routing_transistors (t_segment_inf *segment_inf, float R_minW_nmos, float R_minW_pmos)
static float	get_cblock_trans (int *num_inputs_to_cblock, int max_inputs_to_cblock, float trans_cblock_to_lblock_buf, float trans_sram_bit)
static float *	alloc_and_load_unsharable_switch_trans (int num_switch, float trans_sram_bit, float R_minW_nmos)
static float *	alloc_and_load_sharable_switch_trans (int num_switch, float trans_sram_bit, float R_minW_nmos, float R_minW_pmos)
static float	trans_per_buf (float Rbuf, float R_minW_nmos, float R_minW_pmos)
static float	trans_per_mux (int num_inputs, float trans_sram_bit, float pass_trans_area)
static float	trans_per_R (float Rtrans, float R_minW_trans)
void	count_routing_transistors (enum e_directionality directionality, int num_switch, t_segment_inf *segment_inf, float R_minW_nmos, float R_minW_pmos)

Function Documentation

static float * alloc_and_load_sharable_switch_trans	(	int	num_switch,
		float	trans_sram_bit,
		float	R_minW_nmos,
		float	R_minW_pmos
	)			`[static]`

Definition at line 605 of file rr_graph_area.c.

00610 00611 00612 00613 00614 00615 00616 00617 00618 00619 00620 00621 00622 00623         { 00624 00625 00626 00627 00628 00629 00630 00631 00632 00633 00634 00635         } 00636 00637 00638 }

class="fragment">00609 { /* Loads up an array that says how many transistor are needed to implement * * the sharable portion of each switch type. The SRAM bit of a switch and * * the pass transistor (forming either the entire switch or the output part * * of a tri-state buffer) are both unsharable. Only the buffer part of a * * buffer switch is sharable. */ float *sharable_switch_trans, Rbuf; int i; sharable_switch_trans = (float *)my_malloc(num_switch * sizeof(float)); for(i = 0; i < num_switch; i++) if(switch_inf[i].buffered == FALSE) { sharable_switch_trans[i] = 0.; } else { /* Buffer. Set Rbuf = Rpass = 1/2 Rtotal. */ Rbuf = switch_inf[i].R / 2.; sharable_switch_trans[i] = trans_per_buf(Rbuf, R_minW_nmos, R_minW_pmos); } return (sharable_switch_trans);

Here is the call graph for this function:

Here is the caller graph for this function:

static float * alloc_and_load_unsharable_switch_trans	(	int	num_switch,
		float	trans_sram_bit,
		float	R_minW_nmos
	)			`[static]`

Definition at line 569 of file rr_graph_area.c.

00572 {
00573 
00574 /* Loads up an array that says how many transistors are needed to implement  *
00575  * the unsharable portion of each switch type.  The SRAM bit of a switch and *
00576  * the pass transistor (forming either the entire switch or the output part  *
00577  * of a tri-state buffer) are both unsharable.                               */
00578 
00579     float *unsharable_switch_trans, Rpass;
00580     int i;
00581 
00582     unsharable_switch_trans = (float *)my_malloc(num_switch * sizeof(float));
00583 
00584     for(i = 0; i < num_switch; i++)
00585         {
00586 
00587             if(switch_inf[i].buffered == FALSE)
00588                 {
00589                     Rpass = switch_inf[i].R;
00590                 }
00591             else
00592                 {               /* Buffer.  Set Rpass = Rbuf = 1/2 Rtotal. */
00593                     Rpass = switch_inf[i].R / 2.;
00594                 }
00595 
00596             unsharable_switch_trans[i] = trans_per_R(Rpass, R_minW_nmos) +
00597                 trans_sram_bit;
00598         }
00599 
00600     return (unsharable_switch_trans);
00601 }

Here is the call graph for this function:

Here is the caller graph for this function:

void count_bidir_routing_transistors	(	int	num_switch,
		float	R_minW_nmos,
		float	R_minW_pmos
	)			`[static]`

Definition at line 82 of file rr_graph_area.c.

00085 {
00086 
00087 /* Tri-state buffers are designed as a buffer followed by a pass transistor. *
00088  * I make Rbuffer = Rpass_transitor = 1/2 Rtri-state_buffer.                 *
00089  * I make the pull-up and pull-down sides of the buffer the same strength -- *
00090  * i.e. I make the p transistor R_minW_pmos / R_minW_nmos wider than the n   *
00091  * transistor.                                                               *
00092  *                                                                           *
00093  * I generate two area numbers in this routine:  ntrans_sharing and          *
00094  * ntrans_no_sharing.  ntrans_sharing exactly reflects what the timing       *
00095  * analyzer, etc. works with -- each switch is a completely self contained   *
00096  * pass transistor or tri-state buffer.  In the case of tri-state buffers    *
00097  * this is rather pessimisitic.  The inverter chain part of the buffer (as   *
00098  * opposed to the pass transistor + SRAM output part) can be shared by       *
00099  * several switches in the same location.  Obviously all the switches from   *
00100  * an OPIN can share one buffer.  Also, CHANX and CHANY switches at the same *
00101  * spot (i,j) on a single segment can share a buffer.  For a more realistic  *
00102  * area number I assume all buffered switches from a node that are at the    *
00103  * *same (i,j) location* can share one buffer.  Only the lowest resistance   *
00104  * (largest) buffer is implemented.  In practice, you might want to build    *
00105  * something that is 1.5x or 2x the largest buffer, so this may be a bit     *
00106  * optimistic (but I still think it's pretty reasonable).                    */
00107 
00108 
00109     int *num_inputs_to_cblock;  /* [0..num_rr_nodes-1], but all entries not    */
00110 
00111     /* corresponding to IPINs will be 0.           */
00112 
00113     boolean *cblock_counted;    /* [0..max(nx,ny)] -- 0th element unused. */
00114     float *shared_buffer_trans; /* [0..max_nx,ny)] */
00115     float *unsharable_switch_trans, *sharable_switch_trans;     /* [0..num_switch-1] */
00116 
00117     t_rr_type from_rr_type, to_rr_type;
00118     int from_node, to_node, iedge, num_edges, maxlen;
00119     int iswitch, i, j, iseg, max_inputs_to_cblock;
00120     float input_cblock_trans, shared_opin_buffer_trans;
00121     const float trans_sram_bit = 6.;
00122 
00123 /* Two variables below are the accumulator variables that add up all the    *
00124  * transistors in the routing.  Make doubles so that they don't stop        *
00125  * incrementing once adding a switch makes a change of less than 1 part in  *
00126  * 10^7 to the total.  If this still isn't good enough (adding 1 part in    *
00127  * 10^15 will still be thrown away), compute the transistor count in        *
00128  * "chunks", by adding up inodes 1 to 1000, 1001 to 2000 and then summing   *
00129  * the partial sums together.                                               */
00130 
00131     double ntrans_sharing, ntrans_no_sharing;
00132 
00133 
00134 /* Buffers from the routing to the ipin cblock inputs, and from the ipin    *
00135  * cblock outputs to the logic block, respectively.  Assume minimum size n  *
00136  * transistors, and ptransistors sized to make the pull-up R = pull-down R. */
00137 
00138     float trans_track_to_cblock_buf;
00139     float trans_cblock_to_lblock_buf;
00140 
00141 
00142     ntrans_sharing = 0.;
00143     ntrans_no_sharing = 0.;
00144     max_inputs_to_cblock = 0;
00145 
00146 /* Assume the two buffers below are 4x minimum drive strength (enough to *
00147  * drive a fanout of up to 16 pretty nicely -- should cover a reasonable * 
00148  * wiring C plus the fanout.                                             */
00149 
00150     trans_track_to_cblock_buf =
00151         trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos);
00152 
00153     trans_cblock_to_lblock_buf =
00154         trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos);
00155 
00156     num_inputs_to_cblock = (int *)my_calloc(num_rr_nodes, sizeof(int));
00157 
00158     maxlen = max(nx, ny) + 1;
00159     cblock_counted = (boolean *) my_calloc(maxlen, sizeof(boolean));
00160     shared_buffer_trans = (float *)my_calloc(maxlen, sizeof(float));
00161 
00162     unsharable_switch_trans =
00163         alloc_and_load_unsharable_switch_trans(num_switch, trans_sram_bit,
00164                                                R_minW_nmos);
00165 
00166     sharable_switch_trans =
00167         alloc_and_load_sharable_switch_trans(num_switch, trans_sram_bit,
00168                                              R_minW_nmos, R_minW_pmos);
00169 
00170     for(from_node = 0; from_node < num_rr_nodes; from_node++)
00171         {
00172 
00173             from_rr_type = rr_node[from_node].type;
00174 
00175             switch (from_rr_type)
00176                 {
00177 
00178                 case CHANX:
00179                 case CHANY:
00180                     num_edges = rr_node[from_node].num_edges;
00181 
00182                     for(iedge = 0; iedge < num_edges; iedge++)
00183                         {
00184 
00185                             to_node = rr_node[from_node].edges[iedge];
00186                             to_rr_type = rr_node[to_node].type;
00187 
00188                             switch (to_rr_type)
00189                                 {
00190 
00191                                 case CHANX:
00192                                 case CHANY:
00193                                     iswitch =
00194                                         rr_node[from_node].switches[iedge];
00195 
00196                                     if(switch_inf[iswitch].buffered)
00197                                         {
00198                                             iseg =
00199                                                 seg_index_of_sblock(from_node,
00200                                                                     to_node);
00201                                             shared_buffer_trans[iseg] =
00202                                                 max(shared_buffer_trans[iseg],
00203                                                     sharable_switch_trans
00204                                                     [iswitch]);
00205 
00206                                             ntrans_no_sharing +=
00207                                                 unsharable_switch_trans
00208                                                 [iswitch] +
00209                                                 sharable_switch_trans
00210                                                 [iswitch];
00211                                             ntrans_sharing +=
00212                                                 unsharable_switch_trans
00213                                                 [iswitch];
00214                                         }
00215                                     else if(from_node < to_node)
00216                                         {
00217 
00218                                             /* Pass transistor shared by two edges -- only count once.  *
00219                                              * Also, no part of a pass transistor is sharable.          */
00220 
00221                                             ntrans_no_sharing +=
00222                                                 unsharable_switch_trans
00223                                                 [iswitch];
00224                                             ntrans_sharing +=
00225                                                 unsharable_switch_trans
00226                                                 [iswitch];
00227                                         }
00228                                     break;
00229 
00230                                 case IPIN:
00231                                     num_inputs_to_cblock[to_node]++;
00232                                     max_inputs_to_cblock =
00233                                         max(max_inputs_to_cblock,
00234                                             num_inputs_to_cblock[to_node]);
00235 
00236                                     iseg =
00237                                         seg_index_of_cblock(from_rr_type,
00238                                                             to_node);
00239 
00240                                     if(cblock_counted[iseg] == FALSE)
00241                                         {
00242                                             cblock_counted[iseg] = TRUE;
00243                                             ntrans_sharing +=
00244                                                 trans_track_to_cblock_buf;
00245                                             ntrans_no_sharing +=
00246                                                 trans_track_to_cblock_buf;
00247                                         }
00248                                     break;
00249 
00250                                 default:
00251                                     printf
00252                                         ("Error in count_routing_transistors:  Unexpected \n"
00253                                          "connection from node %d (type %d) to node %d (type %d).\n",
00254                                          from_node, from_rr_type, to_node,
00255                                          to_rr_type);
00256                                     exit(1);
00257                                     break;
00258 
00259                                 }       /* End switch on to_rr_type. */
00260 
00261                         }       /* End for each edge. */
00262 
00263                     /* Now add in the shared buffer transistors, and reset some flags. */
00264 
00265                     if(from_rr_type == CHANX)
00266                         {
00267                             for(i = rr_node[from_node].xlow - 1;
00268                                 i <= rr_node[from_node].xhigh; i++)
00269                                 {
00270                                     ntrans_sharing += shared_buffer_trans[i];
00271                                     shared_buffer_trans[i] = 0.;
00272                                 }
00273 
00274                             for(i = rr_node[from_node].xlow;
00275                                 i <= rr_node[from_node].xhigh; i++)
00276                                 cblock_counted[i] = FALSE;
00277 
00278                         }
00279                     else
00280                         {       /* CHANY */
00281                             for(j = rr_node[from_node].ylow - 1;
00282                                 j <= rr_node[from_node].yhigh; j++)
00283                                 {
00284                                     ntrans_sharing += shared_buffer_trans[j];
00285                                     shared_buffer_trans[j] = 0.;
00286                                 }
00287 
00288                             for(j = rr_node[from_node].ylow;
00289                                 j <= rr_node[from_node].yhigh; j++)
00290                                 cblock_counted[j] = FALSE;
00291 
00292                         }
00293                     break;
00294 
00295                 case OPIN:
00296                     num_edges = rr_node[from_node].num_edges;
00297                     shared_opin_buffer_trans = 0.;
00298 
00299                     for(iedge = 0; iedge < num_edges; iedge++)
00300                         {
00301                             iswitch = rr_node[from_node].switches[iedge];
00302                             ntrans_no_sharing +=
00303                                 unsharable_switch_trans[iswitch] +
00304                                 sharable_switch_trans[iswitch];
00305                             ntrans_sharing +=
00306                                 unsharable_switch_trans[iswitch];
00307 
00308                             shared_opin_buffer_trans =
00309                                 max(shared_opin_buffer_trans,
00310                                     sharable_switch_trans[iswitch]);
00311                         }
00312 
00313                     ntrans_sharing += shared_opin_buffer_trans;
00314                     break;
00315 
00316                 default:
00317                     break;
00318 
00319                 }               /* End switch on from_rr_type */
00320         }                       /* End for all nodes */
00321 
00322     free(cblock_counted);
00323     free(shared_buffer_trans);
00324     free(unsharable_switch_trans);
00325     free(sharable_switch_trans);
00326 
00327 /* Now add in the input connection block transistors. */
00328 
00329     input_cblock_trans = get_cblock_trans(num_inputs_to_cblock,
00330                                           max_inputs_to_cblock,
00331                                           trans_cblock_to_lblock_buf,
00332                                           trans_sram_bit);
00333 
00334     free(num_inputs_to_cblock);
00335 
00336     ntrans_sharing += input_cblock_trans;
00337     ntrans_no_sharing += input_cblock_trans;
00338 
00339     printf("\nRouting area (in minimum width transistor areas):\n");
00340     printf
00341         ("Assuming no buffer sharing (pessimistic). Total: %#g  Per logic tile: "
00342          "%#g\n", ntrans_no_sharing, ntrans_no_sharing / (float)(nx * ny));
00343     printf
00344         ("Assuming buffer sharing (slightly optimistic). Total: %#g  Per logic tile: "
00345          "%#g\n\n", ntrans_sharing, ntrans_sharing / (float)(nx * ny));
00346 }

Here is the call graph for this function:

Here is the caller graph for this function:

void count_routing_transistors	(	enum e_directionality	directionality,
		int	num_switch,
		t_segment_inf *	segment_inf,
		float	R_minW_nmos,
		float	R_minW_pmos
	)

Definition at line 50 of file rr_graph_area.c.

00055 {
00056 
00057 /* Counts how many transistors are needed to implement the FPGA routing      *
00058  * resources.  Call this only when an rr_graph exists.  It does not count    *
00059  * the transistors used in logic blocks, but it counts the transistors in    *
00060  * the input connection block multiplexers and in the output pin drivers and *
00061  * pass transistors.  NB:  this routine assumes pass transistors always      *
00062  * generate two edges (one forward, one backward) between two nodes.         *
00063  * Physically, this is what happens -- make sure your rr_graph does it.      *
00064  *                                                                           *
00065  * I assume a minimum width transistor takes 1 unit of area.  A double-width *
00066  * transistor takes the twice the diffusion width, but the same spacing, so  *
00067  * I assume it takes 1.5x the area of a minimum-width transitor.             */
00068     if(directionality == BI_DIRECTIONAL)
00069         {
00070             count_bidir_routing_transistors(num_switch, R_minW_nmos,
00071                                             R_minW_pmos);
00072         }
00073     else
00074         {
00075             assert(directionality == UNI_DIRECTIONAL);
00076             count_unidir_routing_transistors(segment_inf, R_minW_nmos,
00077                                              R_minW_pmos);
00078         }
00079 }

Here is the call graph for this function:

Here is the caller graph for this function:

void count_unidir_routing_transistors	(	t_segment_inf *	segment_inf,
		float	R_minW_nmos,
		float	R_minW_pmos
	)			`[static]`

Definition at line 349 of file rr_graph_area.c.

00352 {
00353     boolean *cblock_counted;    /* [0..max(nx,ny)] -- 0th element unused. */
00354     int *num_inputs_to_cblock;  /* [0..num_rr_nodes-1], but all entries not    */
00355 
00356     /* corresponding to IPINs will be 0.           */
00357 
00358     t_rr_type from_rr_type, to_rr_type;
00359     int i, j, iseg, from_node, to_node, iedge, num_edges, maxlen;
00360     int max_inputs_to_cblock, cost_index, seg_type, switch_type;
00361     float input_cblock_trans;
00362     const float trans_sram_bit = 6.;
00363 
00364 /* Two variables below are the accumulator variables that add up all the    *
00365  * transistors in the routing.  Make doubles so that they don't stop        *
00366  * incrementing once adding a switch makes a change of less than 1 part in  *
00367  * 10^7 to the total.  If this still isn't good enough (adding 1 part in    *
00368  * 10^15 will still be thrown away), compute the transistor count in        *
00369  * "chunks", by adding up inodes 1 to 1000, 1001 to 2000 and then summing   *
00370  * the partial sums together.                                               */
00371 
00372     double ntrans;
00373 
00374 
00375 /* Buffers from the routing to the ipin cblock inputs, and from the ipin    *
00376  * cblock outputs to the logic block, respectively.  Assume minimum size n  *
00377  * transistors, and ptransistors sized to make the pull-up R = pull-down R. */
00378 
00379     float trans_track_to_cblock_buf;
00380     float trans_cblock_to_lblock_buf;
00381 
00382     max_inputs_to_cblock = 0;
00383 
00384 /* Assume the two buffers below are 4x minimum drive strength (enough to *
00385  * drive a fanout of up to 16 pretty nicely -- should cover a reasonable * 
00386  * wiring C plus the fanout.                                             */
00387 
00388     trans_track_to_cblock_buf =
00389         trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos);
00390 
00391     trans_cblock_to_lblock_buf =
00392         trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos);
00393 
00394     num_inputs_to_cblock = (int *)my_calloc(num_rr_nodes, sizeof(int));
00395     maxlen = max(nx, ny) + 1;
00396     cblock_counted = (boolean *) my_calloc(maxlen, sizeof(boolean));
00397 
00398     ntrans = 0;
00399     for(from_node = 0; from_node < num_rr_nodes; from_node++)
00400         {
00401 
00402             from_rr_type = rr_node[from_node].type;
00403 
00404             switch (from_rr_type)
00405                 {
00406 
00407                 case CHANX:
00408                 case CHANY:
00409                     num_edges = rr_node[from_node].num_edges;
00410                     cost_index = rr_node[from_node].cost_index;
00411                     seg_type = rr_indexed_data[cost_index].seg_index;
00412                     switch_type = segment_inf[seg_type].wire_switch;
00413                     assert(segment_inf[seg_type].wire_switch ==
00414                            segment_inf[seg_type].opin_switch);
00415                     assert(switch_inf[switch_type].mux_trans_size >= 1);        /* can't be smaller than min sized transistor */
00416 
00417                         assert(rr_node[from_node].num_opin_drivers == 0); /* undir has no opin or wire switches */
00418                         assert(rr_node[from_node].num_wire_drivers == 0); /* undir has no opin or wire switches */
00419 
00420                     /* Each wire segment begins with a multipexer followed by a driver for unidirectional */
00421                     /* Add up area of multiplexer */
00422                     ntrans +=
00423                                 trans_per_mux(rr_node[from_node].fan_in,
00424                                       trans_sram_bit,
00425                                       switch_inf[switch_type].mux_trans_size);
00426 
00427                     /* Add up area of buffer */
00428                     if(switch_inf[switch_type].buf_size == 0)
00429                         {
00430                             ntrans +=
00431                                 trans_per_buf(switch_inf[switch_type].R,
00432                                               R_minW_nmos, R_minW_pmos);
00433                         }
00434                     else
00435                         {
00436                             ntrans += switch_inf[switch_type].buf_size;
00437                         }
00438 
00439                     for(iedge = 0; iedge < num_edges; iedge++)
00440                         {
00441 
00442                             to_node = rr_node[from_node].edges[iedge];
00443                             to_rr_type = rr_node[to_node].type;
00444 
00445                             switch (to_rr_type)
00446                                 {
00447 
00448                                 case CHANX:
00449                                 case CHANY:
00450                                     break;
00451 
00452                                 case IPIN:
00453                                     num_inputs_to_cblock[to_node]++;
00454                                     max_inputs_to_cblock =
00455                                         max(max_inputs_to_cblock,
00456                                             num_inputs_to_cblock[to_node]);
00457                                     iseg =
00458                                         seg_index_of_cblock(from_rr_type,
00459                                                             to_node);
00460 
00461                                     if(cblock_counted[iseg] == FALSE)
00462                                         {
00463                                             cblock_counted[iseg] = TRUE;
00464                                             ntrans +=
00465                                                 trans_track_to_cblock_buf;
00466                                         }
00467                                     break;
00468 
00469                                 default:
00470                                     printf
00471                                         ("Error in count_routing_transistors:  Unexpected \n"
00472                                          "connection from node %d (type %d) to node %d (type %d).\n",
00473                                          from_node, from_rr_type, to_node,
00474                                          to_rr_type);
00475                                     exit(1);
00476                                     break;
00477 
00478                                 }       /* End switch on to_rr_type. */
00479 
00480                         }       /* End for each edge. */
00481 
00482                     /* Reset some flags */
00483                     if(from_rr_type == CHANX)
00484                         {
00485                             for(i = rr_node[from_node].xlow;
00486                                 i <= rr_node[from_node].xhigh; i++)
00487                                 cblock_counted[i] = FALSE;
00488 
00489                         }
00490                     else
00491                         {       /* CHANY */
00492                             for(j = rr_node[from_node].ylow;
00493                                 j <= rr_node[from_node].yhigh; j++)
00494                                 cblock_counted[j] = FALSE;
00495 
00496                         }
00497                     break;
00498                 case OPIN:
00499                     break;
00500 
00501                 default:
00502                     break;
00503 
00504                 }               /* End switch on from_rr_type */
00505         }                       /* End for all nodes */
00506 
00507     /* Now add in the input connection block transistors. */
00508 
00509     input_cblock_trans = get_cblock_trans(num_inputs_to_cblock,
00510                                           max_inputs_to_cblock,
00511                                           trans_cblock_to_lblock_buf,
00512                                           trans_sram_bit);
00513 
00514     free(cblock_counted);
00515     free(num_inputs_to_cblock);
00516 
00517     ntrans += input_cblock_trans;
00518 
00519     printf("\nRouting area (in minimum width transistor areas):\n");
00520     printf("Total Routing Area: %#g  Per logic tile: %#g\n", ntrans,
00521            ntrans / (float)(nx * ny));
00522 }

Here is the call graph for this function:

Here is the caller graph for this function:

static float get_cblock_trans	(	int *	num_inputs_to_cblock,
		int	max_inputs_to_cblock,
		float	trans_cblock_to_lblock_buf,
		float	trans_sram_bit
	)			`[static]`

Definition at line 526 of file rr_graph_area.c.

00530 {
00531 
00532 /* Computes the transistors in the input connection block multiplexers and   *
00533  * the buffers from connection block outputs to the logic block input pins.  *
00534  * For speed, I precompute the number of transistors in the multiplexers of  *
00535  * interest.                                                                 */
00536 
00537     float *trans_per_cblock;    /* [0..max_inputs_to_cblock] */
00538     float trans_count;
00539     int i, num_inputs;
00540 
00541     trans_per_cblock = (float *)my_malloc((max_inputs_to_cblock + 1) *
00542                                           sizeof(float));
00543 
00544     trans_per_cblock[0] = 0.;   /* i.e., not an IPIN or no inputs */
00545 
00546 /* With one or more inputs, add the mux and output buffer.  I add the output *
00547  * buffer even when the number of inputs = 1 (i.e. no mux) because I assume  *
00548  * I need the drivability just for metal capacitance.                        */
00549 
00550     for(i = 1; i <= max_inputs_to_cblock; i++)
00551         trans_per_cblock[i] =
00552             trans_per_mux(i, trans_sram_bit,
00553                           ipin_mux_trans_size) + trans_cblock_to_lblock_buf;
00554 
00555     trans_count = 0.;
00556 
00557     for(i = 0; i < num_rr_nodes; i++)
00558         {
00559             num_inputs = num_inputs_to_cblock[i];
00560             trans_count += trans_per_cblock[num_inputs];
00561         }
00562 
00563     free(trans_per_cblock);
00564     return (trans_count);
00565 }

Here is the call graph for this function:

Here is the caller graph for this function:

static float trans_per_buf	(	float	Rbuf,
		float	R_minW_nmos,
		float	R_minW_pmos
	)			`[static]`

Definition at line 642 of file rr_graph_area.c.

00645 {
00646 
00647 /* Returns the number of minimum width transistor area equivalents needed to *
00648  * implement this buffer.  Assumes a stage ratio of 4, and equal strength    *
00649  * pull-up and pull-down paths.                                              */
00650 
00651     int num_stage, istage;
00652     float trans_count, stage_ratio, Rstage;
00653 
00654     if(Rbuf > 0.6 * R_minW_nmos || Rbuf <= 0.)
00655         {                       /* Use a single-stage buffer */
00656             trans_count = trans_per_R(Rbuf, R_minW_nmos) + trans_per_R(Rbuf,
00657                                                                        R_minW_pmos);
00658         }
00659     else
00660         {                       /* Use a multi-stage buffer */
00661 
00662             /* Target stage ratio = 4.  1 minimum width buffer, then num_stage bigger *
00663              * ones.                                                                  */
00664 
00665             num_stage = nint(log10(R_minW_nmos / Rbuf) / log10(4.));
00666             num_stage = max(num_stage, 1);
00667             stage_ratio = pow(R_minW_nmos / Rbuf, 1. / (float)num_stage);
00668 
00669             Rstage = R_minW_nmos;
00670             trans_count = 0.;
00671 
00672             for(istage = 0; istage <= num_stage; istage++)
00673                 {
00674                     trans_count +=
00675                         trans_per_R(Rstage, R_minW_nmos) + trans_per_R(Rstage,
00676                                                                        R_minW_pmos);
00677                     Rstage /= stage_ratio;
00678                 }
00679         }
00680 
00681     return (trans_count);
00682 }

Here is the call graph for this function:

Here is the caller graph for this function:

static float trans_per_mux	(	int	num_inputs,
		float	trans_sram_bit,
		float	pass_trans_area
	)			`[static]`

Definition at line 686 of file rr_graph_area.c.

00689 {
00690 
00691 /* Returns the number of transistors needed to build a pass transistor mux. *
00692  * DOES NOT include input buffers or any output buffer.                     *
00693  * Attempts to select smart multiplexer size depending on number of inputs  *
00694  * For multiplexers with inputs 4 or less, one level is used, more has two  *
00695  * levels.                                                                  */
00696     float ntrans, sram_trans, pass_trans;
00697     int num_second_stage_trans;
00698 
00699     if(num_inputs <= 1)
00700         {
00701             return (0);
00702         }
00703     else if(num_inputs == 2)
00704         {
00705             pass_trans = 2 * pass_trans_area;
00706             sram_trans = 1 * trans_sram_bit;
00707         }
00708     else if(num_inputs <= 4)
00709         {
00710             /* One-hot encoding */
00711             pass_trans = num_inputs * pass_trans_area;
00712             sram_trans = num_inputs * trans_sram_bit;
00713         }
00714     else
00715         {
00716             /* This is a large multiplexer so design it using a two-level multiplexer   *
00717              * + 0.00001 is to make sure exact square roots two don't get rounded down  *
00718              * to one lower level.                                                      */
00719             num_second_stage_trans = floor(sqrt(num_inputs) + 0.00001);
00720             pass_trans =
00721                 (num_inputs + num_second_stage_trans) * pass_trans_area;
00722             sram_trans =
00723                 (ceil((float)num_inputs / num_second_stage_trans - 0.00001) +
00724                  num_second_stage_trans) * trans_sram_bit;
00725             if(num_second_stage_trans == 2)
00726                 {
00727                     /* Can use one-bit instead of a two-bit one-hot encoding for the second stage */
00728                     /* Eliminates one sram bit counted earlier */
00729                     sram_trans -= 1 * trans_sram_bit;
00730                 }
00731         }
00732 
00733     ntrans = pass_trans + sram_trans;
00734     return (ntrans);
00735 }

Here is the caller graph for this function:

static float trans_per_R	(	float	Rtrans,
		float	R_minW_trans
	)			`[static]`

Definition at line 739 of file rr_graph_area.c.

00741 {
00742 
00743 /* Returns the number of minimum width transistor area equivalents needed    *
00744  * to make a transistor with Rtrans, given that the resistance of a minimum  *
00745  * width transistor of this type is R_minW_trans.                            */
00746 
00747     float trans_area;
00748 
00749     if(Rtrans <= 0.)            /* Assume resistances are nonsense -- use min. width */
00750         return (1.);
00751 
00752     if(Rtrans >= R_minW_trans)
00753         return (1.);
00754 
00755 /* Area = minimum width area (1) + 0.5 for each additional unit of width.  *
00756  * The 50% factor takes into account the "overlapping" that occurs in      *
00757  * horizontally-paralleled transistors, and the need for only one spacing, *
00758  * not two (i.e. two min W transistors need two spaces; a 2W transistor    *
00759  * needs only 1).                                                          */
00760 
00761     trans_area = 0.5 * R_minW_trans / Rtrans + 0.5;
00762     return (trans_area);
00763 }

Here is the caller graph for this function:

SRC/rr_graph_area.c File Reference

Functions

Function Documentation