#include <math.h>
#include "util.h"
#include "vpr_types.h"
#include <assert.h>
#include "globals.h"
#include "rr_graph_util.h"
#include "rr_graph_area.h"
Go to the source code of this file.
Functions | |
static void | count_bidir_routing_transistors (int num_switch, float R_minW_nmos, float R_minW_pmos) |
static void | count_unidir_routing_transistors (t_segment_inf *segment_inf, float R_minW_nmos, float R_minW_pmos) |
static float | get_cblock_trans (int *num_inputs_to_cblock, int max_inputs_to_cblock, float trans_cblock_to_lblock_buf, float trans_sram_bit) |
static float * | alloc_and_load_unsharable_switch_trans (int num_switch, float trans_sram_bit, float R_minW_nmos) |
static float * | alloc_and_load_sharable_switch_trans (int num_switch, float trans_sram_bit, float R_minW_nmos, float R_minW_pmos) |
static float | trans_per_buf (float Rbuf, float R_minW_nmos, float R_minW_pmos) |
static float | trans_per_mux (int num_inputs, float trans_sram_bit, float pass_trans_area) |
static float | trans_per_R (float Rtrans, float R_minW_trans) |
void | count_routing_transistors (enum e_directionality directionality, int num_switch, t_segment_inf *segment_inf, float R_minW_nmos, float R_minW_pmos) |
static float * alloc_and_load_sharable_switch_trans | ( | int | num_switch, | |
float | trans_sram_bit, | |||
float | R_minW_nmos, | |||
float | R_minW_pmos | |||
) | [static] |
Definition at line 605 of file rr_graph_area.c.
00609 { 00610 00611 /* Loads up an array that says how many transistor are needed to implement * 00612 * the sharable portion of each switch type. The SRAM bit of a switch and * 00613 * the pass transistor (forming either the entire switch or the output part * 00614 * of a tri-state buffer) are both unsharable. Only the buffer part of a * 00615 * buffer switch is sharable. */ 00616 00617 float *sharable_switch_trans, Rbuf; 00618 int i; 00619 00620 sharable_switch_trans = (float *)my_malloc(num_switch * sizeof(float)); 00621 00622 for(i = 0; i < num_switch; i++) 00623 { 00624 00625 if(switch_inf[i].buffered == FALSE) 00626 { 00627 sharable_switch_trans[i] = 0.; 00628 } 00629 else 00630 { /* Buffer. Set Rbuf = Rpass = 1/2 Rtotal. */ 00631 Rbuf = switch_inf[i].R / 2.; 00632 sharable_switch_trans[i] = 00633 trans_per_buf(Rbuf, R_minW_nmos, R_minW_pmos); 00634 } 00635 } 00636 00637 return (sharable_switch_trans); 00638 }
static float * alloc_and_load_unsharable_switch_trans | ( | int | num_switch, | |
float | trans_sram_bit, | |||
float | R_minW_nmos | |||
) | [static] |
Definition at line 569 of file rr_graph_area.c.
00572 { 00573 00574 /* Loads up an array that says how many transistors are needed to implement * 00575 * the unsharable portion of each switch type. The SRAM bit of a switch and * 00576 * the pass transistor (forming either the entire switch or the output part * 00577 * of a tri-state buffer) are both unsharable. */ 00578 00579 float *unsharable_switch_trans, Rpass; 00580 int i; 00581 00582 unsharable_switch_trans = (float *)my_malloc(num_switch * sizeof(float)); 00583 00584 for(i = 0; i < num_switch; i++) 00585 { 00586 00587 if(switch_inf[i].buffered == FALSE) 00588 { 00589 Rpass = switch_inf[i].R; 00590 } 00591 else 00592 { /* Buffer. Set Rpass = Rbuf = 1/2 Rtotal. */ 00593 Rpass = switch_inf[i].R / 2.; 00594 } 00595 00596 unsharable_switch_trans[i] = trans_per_R(Rpass, R_minW_nmos) + 00597 trans_sram_bit; 00598 } 00599 00600 return (unsharable_switch_trans); 00601 }
void count_bidir_routing_transistors | ( | int | num_switch, | |
float | R_minW_nmos, | |||
float | R_minW_pmos | |||
) | [static] |
Definition at line 82 of file rr_graph_area.c.
00085 { 00086 00087 /* Tri-state buffers are designed as a buffer followed by a pass transistor. * 00088 * I make Rbuffer = Rpass_transitor = 1/2 Rtri-state_buffer. * 00089 * I make the pull-up and pull-down sides of the buffer the same strength -- * 00090 * i.e. I make the p transistor R_minW_pmos / R_minW_nmos wider than the n * 00091 * transistor. * 00092 * * 00093 * I generate two area numbers in this routine: ntrans_sharing and * 00094 * ntrans_no_sharing. ntrans_sharing exactly reflects what the timing * 00095 * analyzer, etc. works with -- each switch is a completely self contained * 00096 * pass transistor or tri-state buffer. In the case of tri-state buffers * 00097 * this is rather pessimisitic. The inverter chain part of the buffer (as * 00098 * opposed to the pass transistor + SRAM output part) can be shared by * 00099 * several switches in the same location. Obviously all the switches from * 00100 * an OPIN can share one buffer. Also, CHANX and CHANY switches at the same * 00101 * spot (i,j) on a single segment can share a buffer. For a more realistic * 00102 * area number I assume all buffered switches from a node that are at the * 00103 * *same (i,j) location* can share one buffer. Only the lowest resistance * 00104 * (largest) buffer is implemented. In practice, you might want to build * 00105 * something that is 1.5x or 2x the largest buffer, so this may be a bit * 00106 * optimistic (but I still think it's pretty reasonable). */ 00107 00108 00109 int *num_inputs_to_cblock; /* [0..num_rr_nodes-1], but all entries not */ 00110 00111 /* corresponding to IPINs will be 0. */ 00112 00113 boolean *cblock_counted; /* [0..max(nx,ny)] -- 0th element unused. */ 00114 float *shared_buffer_trans; /* [0..max_nx,ny)] */ 00115 float *unsharable_switch_trans, *sharable_switch_trans; /* [0..num_switch-1] */ 00116 00117 t_rr_type from_rr_type, to_rr_type; 00118 int from_node, to_node, iedge, num_edges, maxlen; 00119 int iswitch, i, j, iseg, max_inputs_to_cblock; 00120 float input_cblock_trans, shared_opin_buffer_trans; 00121 const float trans_sram_bit = 6.; 00122 00123 /* Two variables below are the accumulator variables that add up all the * 00124 * transistors in the routing. Make doubles so that they don't stop * 00125 * incrementing once adding a switch makes a change of less than 1 part in * 00126 * 10^7 to the total. If this still isn't good enough (adding 1 part in * 00127 * 10^15 will still be thrown away), compute the transistor count in * 00128 * "chunks", by adding up inodes 1 to 1000, 1001 to 2000 and then summing * 00129 * the partial sums together. */ 00130 00131 double ntrans_sharing, ntrans_no_sharing; 00132 00133 00134 /* Buffers from the routing to the ipin cblock inputs, and from the ipin * 00135 * cblock outputs to the logic block, respectively. Assume minimum size n * 00136 * transistors, and ptransistors sized to make the pull-up R = pull-down R. */ 00137 00138 float trans_track_to_cblock_buf; 00139 float trans_cblock_to_lblock_buf; 00140 00141 00142 ntrans_sharing = 0.; 00143 ntrans_no_sharing = 0.; 00144 max_inputs_to_cblock = 0; 00145 00146 /* Assume the two buffers below are 4x minimum drive strength (enough to * 00147 * drive a fanout of up to 16 pretty nicely -- should cover a reasonable * 00148 * wiring C plus the fanout. */ 00149 00150 trans_track_to_cblock_buf = 00151 trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos); 00152 00153 trans_cblock_to_lblock_buf = 00154 trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos); 00155 00156 num_inputs_to_cblock = (int *)my_calloc(num_rr_nodes, sizeof(int)); 00157 00158 maxlen = max(nx, ny) + 1; 00159 cblock_counted = (boolean *) my_calloc(maxlen, sizeof(boolean)); 00160 shared_buffer_trans = (float *)my_calloc(maxlen, sizeof(float)); 00161 00162 unsharable_switch_trans = 00163 alloc_and_load_unsharable_switch_trans(num_switch, trans_sram_bit, 00164 R_minW_nmos); 00165 00166 sharable_switch_trans = 00167 alloc_and_load_sharable_switch_trans(num_switch, trans_sram_bit, 00168 R_minW_nmos, R_minW_pmos); 00169 00170 for(from_node = 0; from_node < num_rr_nodes; from_node++) 00171 { 00172 00173 from_rr_type = rr_node[from_node].type; 00174 00175 switch (from_rr_type) 00176 { 00177 00178 case CHANX: 00179 case CHANY: 00180 num_edges = rr_node[from_node].num_edges; 00181 00182 for(iedge = 0; iedge < num_edges; iedge++) 00183 { 00184 00185 to_node = rr_node[from_node].edges[iedge]; 00186 to_rr_type = rr_node[to_node].type; 00187 00188 switch (to_rr_type) 00189 { 00190 00191 case CHANX: 00192 case CHANY: 00193 iswitch = 00194 rr_node[from_node].switches[iedge]; 00195 00196 if(switch_inf[iswitch].buffered) 00197 { 00198 iseg = 00199 seg_index_of_sblock(from_node, 00200 to_node); 00201 shared_buffer_trans[iseg] = 00202 max(shared_buffer_trans[iseg], 00203 sharable_switch_trans 00204 [iswitch]); 00205 00206 ntrans_no_sharing += 00207 unsharable_switch_trans 00208 [iswitch] + 00209 sharable_switch_trans 00210 [iswitch]; 00211 ntrans_sharing += 00212 unsharable_switch_trans 00213 [iswitch]; 00214 } 00215 else if(from_node < to_node) 00216 { 00217 00218 /* Pass transistor shared by two edges -- only count once. * 00219 * Also, no part of a pass transistor is sharable. */ 00220 00221 ntrans_no_sharing += 00222 unsharable_switch_trans 00223 [iswitch]; 00224 ntrans_sharing += 00225 unsharable_switch_trans 00226 [iswitch]; 00227 } 00228 break; 00229 00230 case IPIN: 00231 num_inputs_to_cblock[to_node]++; 00232 max_inputs_to_cblock = 00233 max(max_inputs_to_cblock, 00234 num_inputs_to_cblock[to_node]); 00235 00236 iseg = 00237 seg_index_of_cblock(from_rr_type, 00238 to_node); 00239 00240 if(cblock_counted[iseg] == FALSE) 00241 { 00242 cblock_counted[iseg] = TRUE; 00243 ntrans_sharing += 00244 trans_track_to_cblock_buf; 00245 ntrans_no_sharing += 00246 trans_track_to_cblock_buf; 00247 } 00248 break; 00249 00250 default: 00251 printf 00252 ("Error in count_routing_transistors: Unexpected \n" 00253 "connection from node %d (type %d) to node %d (type %d).\n", 00254 from_node, from_rr_type, to_node, 00255 to_rr_type); 00256 exit(1); 00257 break; 00258 00259 } /* End switch on to_rr_type. */ 00260 00261 } /* End for each edge. */ 00262 00263 /* Now add in the shared buffer transistors, and reset some flags. */ 00264 00265 if(from_rr_type == CHANX) 00266 { 00267 for(i = rr_node[from_node].xlow - 1; 00268 i <= rr_node[from_node].xhigh; i++) 00269 { 00270 ntrans_sharing += shared_buffer_trans[i]; 00271 shared_buffer_trans[i] = 0.; 00272 } 00273 00274 for(i = rr_node[from_node].xlow; 00275 i <= rr_node[from_node].xhigh; i++) 00276 cblock_counted[i] = FALSE; 00277 00278 } 00279 else 00280 { /* CHANY */ 00281 for(j = rr_node[from_node].ylow - 1; 00282 j <= rr_node[from_node].yhigh; j++) 00283 { 00284 ntrans_sharing += shared_buffer_trans[j]; 00285 shared_buffer_trans[j] = 0.; 00286 } 00287 00288 for(j = rr_node[from_node].ylow; 00289 j <= rr_node[from_node].yhigh; j++) 00290 cblock_counted[j] = FALSE; 00291 00292 } 00293 break; 00294 00295 case OPIN: 00296 num_edges = rr_node[from_node].num_edges; 00297 shared_opin_buffer_trans = 0.; 00298 00299 for(iedge = 0; iedge < num_edges; iedge++) 00300 { 00301 iswitch = rr_node[from_node].switches[iedge]; 00302 ntrans_no_sharing += 00303 unsharable_switch_trans[iswitch] + 00304 sharable_switch_trans[iswitch]; 00305 ntrans_sharing += 00306 unsharable_switch_trans[iswitch]; 00307 00308 shared_opin_buffer_trans = 00309 max(shared_opin_buffer_trans, 00310 sharable_switch_trans[iswitch]); 00311 } 00312 00313 ntrans_sharing += shared_opin_buffer_trans; 00314 break; 00315 00316 default: 00317 break; 00318 00319 } /* End switch on from_rr_type */ 00320 } /* End for all nodes */ 00321 00322 free(cblock_counted); 00323 free(shared_buffer_trans); 00324 free(unsharable_switch_trans); 00325 free(sharable_switch_trans); 00326 00327 /* Now add in the input connection block transistors. */ 00328 00329 input_cblock_trans = get_cblock_trans(num_inputs_to_cblock, 00330 max_inputs_to_cblock, 00331 trans_cblock_to_lblock_buf, 00332 trans_sram_bit); 00333 00334 free(num_inputs_to_cblock); 00335 00336 ntrans_sharing += input_cblock_trans; 00337 ntrans_no_sharing += input_cblock_trans; 00338 00339 printf("\nRouting area (in minimum width transistor areas):\n"); 00340 printf 00341 ("Assuming no buffer sharing (pessimistic). Total: %#g Per logic tile: " 00342 "%#g\n", ntrans_no_sharing, ntrans_no_sharing / (float)(nx * ny)); 00343 printf 00344 ("Assuming buffer sharing (slightly optimistic). Total: %#g Per logic tile: " 00345 "%#g\n\n", ntrans_sharing, ntrans_sharing / (float)(nx * ny)); 00346 }
void count_routing_transistors | ( | enum e_directionality | directionality, | |
int | num_switch, | |||
t_segment_inf * | segment_inf, | |||
float | R_minW_nmos, | |||
float | R_minW_pmos | |||
) |
Definition at line 50 of file rr_graph_area.c.
00055 { 00056 00057 /* Counts how many transistors are needed to implement the FPGA routing * 00058 * resources. Call this only when an rr_graph exists. It does not count * 00059 * the transistors used in logic blocks, but it counts the transistors in * 00060 * the input connection block multiplexers and in the output pin drivers and * 00061 * pass transistors. NB: this routine assumes pass transistors always * 00062 * generate two edges (one forward, one backward) between two nodes. * 00063 * Physically, this is what happens -- make sure your rr_graph does it. * 00064 * * 00065 * I assume a minimum width transistor takes 1 unit of area. A double-width * 00066 * transistor takes the twice the diffusion width, but the same spacing, so * 00067 * I assume it takes 1.5x the area of a minimum-width transitor. */ 00068 if(directionality == BI_DIRECTIONAL) 00069 { 00070 count_bidir_routing_transistors(num_switch, R_minW_nmos, 00071 R_minW_pmos); 00072 } 00073 else 00074 { 00075 assert(directionality == UNI_DIRECTIONAL); 00076 count_unidir_routing_transistors(segment_inf, R_minW_nmos, 00077 R_minW_pmos); 00078 } 00079 }
void count_unidir_routing_transistors | ( | t_segment_inf * | segment_inf, | |
float | R_minW_nmos, | |||
float | R_minW_pmos | |||
) | [static] |
Definition at line 349 of file rr_graph_area.c.
00352 { 00353 boolean *cblock_counted; /* [0..max(nx,ny)] -- 0th element unused. */ 00354 int *num_inputs_to_cblock; /* [0..num_rr_nodes-1], but all entries not */ 00355 00356 /* corresponding to IPINs will be 0. */ 00357 00358 t_rr_type from_rr_type, to_rr_type; 00359 int i, j, iseg, from_node, to_node, iedge, num_edges, maxlen; 00360 int max_inputs_to_cblock, cost_index, seg_type, switch_type; 00361 float input_cblock_trans; 00362 const float trans_sram_bit = 6.; 00363 00364 /* Two variables below are the accumulator variables that add up all the * 00365 * transistors in the routing. Make doubles so that they don't stop * 00366 * incrementing once adding a switch makes a change of less than 1 part in * 00367 * 10^7 to the total. If this still isn't good enough (adding 1 part in * 00368 * 10^15 will still be thrown away), compute the transistor count in * 00369 * "chunks", by adding up inodes 1 to 1000, 1001 to 2000 and then summing * 00370 * the partial sums together. */ 00371 00372 double ntrans; 00373 00374 00375 /* Buffers from the routing to the ipin cblock inputs, and from the ipin * 00376 * cblock outputs to the logic block, respectively. Assume minimum size n * 00377 * transistors, and ptransistors sized to make the pull-up R = pull-down R. */ 00378 00379 float trans_track_to_cblock_buf; 00380 float trans_cblock_to_lblock_buf; 00381 00382 max_inputs_to_cblock = 0; 00383 00384 /* Assume the two buffers below are 4x minimum drive strength (enough to * 00385 * drive a fanout of up to 16 pretty nicely -- should cover a reasonable * 00386 * wiring C plus the fanout. */ 00387 00388 trans_track_to_cblock_buf = 00389 trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos); 00390 00391 trans_cblock_to_lblock_buf = 00392 trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos); 00393 00394 num_inputs_to_cblock = (int *)my_calloc(num_rr_nodes, sizeof(int)); 00395 maxlen = max(nx, ny) + 1; 00396 cblock_counted = (boolean *) my_calloc(maxlen, sizeof(boolean)); 00397 00398 ntrans = 0; 00399 for(from_node = 0; from_node < num_rr_nodes; from_node++) 00400 { 00401 00402 from_rr_type = rr_node[from_node].type; 00403 00404 switch (from_rr_type) 00405 { 00406 00407 case CHANX: 00408 case CHANY: 00409 num_edges = rr_node[from_node].num_edges; 00410 cost_index = rr_node[from_node].cost_index; 00411 seg_type = rr_indexed_data[cost_index].seg_index; 00412 switch_type = segment_inf[seg_type].wire_switch; 00413 assert(segment_inf[seg_type].wire_switch == 00414 segment_inf[seg_type].opin_switch); 00415 assert(switch_inf[switch_type].mux_trans_size >= 1); /* can't be smaller than min sized transistor */ 00416 00417 assert(rr_node[from_node].num_opin_drivers == 0); /* undir has no opin or wire switches */ 00418 assert(rr_node[from_node].num_wire_drivers == 0); /* undir has no opin or wire switches */ 00419 00420 /* Each wire segment begins with a multipexer followed by a driver for unidirectional */ 00421 /* Add up area of multiplexer */ 00422 ntrans += 00423 trans_per_mux(rr_node[from_node].fan_in, 00424 trans_sram_bit, 00425 switch_inf[switch_type].mux_trans_size); 00426 00427 /* Add up area of buffer */ 00428 if(switch_inf[switch_type].buf_size == 0) 00429 { 00430 ntrans += 00431 trans_per_buf(switch_inf[switch_type].R, 00432 R_minW_nmos, R_minW_pmos); 00433 } 00434 else 00435 { 00436 ntrans += switch_inf[switch_type].buf_size; 00437 } 00438 00439 for(iedge = 0; iedge < num_edges; iedge++) 00440 { 00441 00442 to_node = rr_node[from_node].edges[iedge]; 00443 to_rr_type = rr_node[to_node].type; 00444 00445 switch (to_rr_type) 00446 { 00447 00448 case CHANX: 00449 case CHANY: 00450 break; 00451 00452 case IPIN: 00453 num_inputs_to_cblock[to_node]++; 00454 max_inputs_to_cblock = 00455 max(max_inputs_to_cblock, 00456 num_inputs_to_cblock[to_node]); 00457 iseg = 00458 seg_index_of_cblock(from_rr_type, 00459 to_node); 00460 00461 if(cblock_counted[iseg] == FALSE) 00462 { 00463 cblock_counted[iseg] = TRUE; 00464 ntrans += 00465 trans_track_to_cblock_buf; 00466 } 00467 break; 00468 00469 default: 00470 printf 00471 ("Error in count_routing_transistors: Unexpected \n" 00472 "connection from node %d (type %d) to node %d (type %d).\n", 00473 from_node, from_rr_type, to_node, 00474 to_rr_type); 00475 exit(1); 00476 break; 00477 00478 } /* End switch on to_rr_type. */ 00479 00480 } /* End for each edge. */ 00481 00482 /* Reset some flags */ 00483 if(from_rr_type == CHANX) 00484 { 00485 for(i = rr_node[from_node].xlow; 00486 i <= rr_node[from_node].xhigh; i++) 00487 cblock_counted[i] = FALSE; 00488 00489 } 00490 else 00491 { /* CHANY */ 00492 for(j = rr_node[from_node].ylow; 00493 j <= rr_node[from_node].yhigh; j++) 00494 cblock_counted[j] = FALSE; 00495 00496 } 00497 break; 00498 case OPIN: 00499 break; 00500 00501 default: 00502 break; 00503 00504 } /* End switch on from_rr_type */ 00505 } /* End for all nodes */ 00506 00507 /* Now add in the input connection block transistors. */ 00508 00509 input_cblock_trans = get_cblock_trans(num_inputs_to_cblock, 00510 max_inputs_to_cblock, 00511 trans_cblock_to_lblock_buf, 00512 trans_sram_bit); 00513 00514 free(cblock_counted); 00515 free(num_inputs_to_cblock); 00516 00517 ntrans += input_cblock_trans; 00518 00519 printf("\nRouting area (in minimum width transistor areas):\n"); 00520 printf("Total Routing Area: %#g Per logic tile: %#g\n", ntrans, 00521 ntrans / (float)(nx * ny)); 00522 }
static float get_cblock_trans | ( | int * | num_inputs_to_cblock, | |
int | max_inputs_to_cblock, | |||
float | trans_cblock_to_lblock_buf, | |||
float | trans_sram_bit | |||
) | [static] |
Definition at line 526 of file rr_graph_area.c.
00530 { 00531 00532 /* Computes the transistors in the input connection block multiplexers and * 00533 * the buffers from connection block outputs to the logic block input pins. * 00534 * For speed, I precompute the number of transistors in the multiplexers of * 00535 * interest. */ 00536 00537 float *trans_per_cblock; /* [0..max_inputs_to_cblock] */ 00538 float trans_count; 00539 int i, num_inputs; 00540 00541 trans_per_cblock = (float *)my_malloc((max_inputs_to_cblock + 1) * 00542 sizeof(float)); 00543 00544 trans_per_cblock[0] = 0.; /* i.e., not an IPIN or no inputs */ 00545 00546 /* With one or more inputs, add the mux and output buffer. I add the output * 00547 * buffer even when the number of inputs = 1 (i.e. no mux) because I assume * 00548 * I need the drivability just for metal capacitance. */ 00549 00550 for(i = 1; i <= max_inputs_to_cblock; i++) 00551 trans_per_cblock[i] = 00552 trans_per_mux(i, trans_sram_bit, 00553 ipin_mux_trans_size) + trans_cblock_to_lblock_buf; 00554 00555 trans_count = 0.; 00556 00557 for(i = 0; i < num_rr_nodes; i++) 00558 { 00559 num_inputs = num_inputs_to_cblock[i]; 00560 trans_count += trans_per_cblock[num_inputs]; 00561 } 00562 00563 free(trans_per_cblock); 00564 return (trans_count); 00565 }
static float trans_per_buf | ( | float | Rbuf, | |
float | R_minW_nmos, | |||
float | R_minW_pmos | |||
) | [static] |
Definition at line 642 of file rr_graph_area.c.
00645 { 00646 00647 /* Returns the number of minimum width transistor area equivalents needed to * 00648 * implement this buffer. Assumes a stage ratio of 4, and equal strength * 00649 * pull-up and pull-down paths. */ 00650 00651 int num_stage, istage; 00652 float trans_count, stage_ratio, Rstage; 00653 00654 if(Rbuf > 0.6 * R_minW_nmos || Rbuf <= 0.) 00655 { /* Use a single-stage buffer */ 00656 trans_count = trans_per_R(Rbuf, R_minW_nmos) + trans_per_R(Rbuf, 00657 R_minW_pmos); 00658 } 00659 else 00660 { /* Use a multi-stage buffer */ 00661 00662 /* Target stage ratio = 4. 1 minimum width buffer, then num_stage bigger * 00663 * ones. */ 00664 00665 num_stage = nint(log10(R_minW_nmos / Rbuf) / log10(4.)); 00666 num_stage = max(num_stage, 1); 00667 stage_ratio = pow(R_minW_nmos / Rbuf, 1. / (float)num_stage); 00668 00669 Rstage = R_minW_nmos; 00670 trans_count = 0.; 00671 00672 for(istage = 0; istage <= num_stage; istage++) 00673 { 00674 trans_count += 00675 trans_per_R(Rstage, R_minW_nmos) + trans_per_R(Rstage, 00676 R_minW_pmos); 00677 Rstage /= stage_ratio; 00678 } 00679 } 00680 00681 return (trans_count); 00682 }
static float trans_per_mux | ( | int | num_inputs, | |
float | trans_sram_bit, | |||
float | pass_trans_area | |||
) | [static] |
Definition at line 686 of file rr_graph_area.c.
00689 { 00690 00691 /* Returns the number of transistors needed to build a pass transistor mux. * 00692 * DOES NOT include input buffers or any output buffer. * 00693 * Attempts to select smart multiplexer size depending on number of inputs * 00694 * For multiplexers with inputs 4 or less, one level is used, more has two * 00695 * levels. */ 00696 float ntrans, sram_trans, pass_trans; 00697 int num_second_stage_trans; 00698 00699 if(num_inputs <= 1) 00700 { 00701 return (0); 00702 } 00703 else if(num_inputs == 2) 00704 { 00705 pass_trans = 2 * pass_trans_area; 00706 sram_trans = 1 * trans_sram_bit; 00707 } 00708 else if(num_inputs <= 4) 00709 { 00710 /* One-hot encoding */ 00711 pass_trans = num_inputs * pass_trans_area; 00712 sram_trans = num_inputs * trans_sram_bit; 00713 } 00714 else 00715 { 00716 /* This is a large multiplexer so design it using a two-level multiplexer * 00717 * + 0.00001 is to make sure exact square roots two don't get rounded down * 00718 * to one lower level. */ 00719 num_second_stage_trans = floor(sqrt(num_inputs) + 0.00001); 00720 pass_trans = 00721 (num_inputs + num_second_stage_trans) * pass_trans_area; 00722 sram_trans = 00723 (ceil((float)num_inputs / num_second_stage_trans - 0.00001) + 00724 num_second_stage_trans) * trans_sram_bit; 00725 if(num_second_stage_trans == 2) 00726 { 00727 /* Can use one-bit instead of a two-bit one-hot encoding for the second stage */ 00728 /* Eliminates one sram bit counted earlier */ 00729 sram_trans -= 1 * trans_sram_bit; 00730 } 00731 } 00732 00733 ntrans = pass_trans + sram_trans; 00734 return (ntrans); 00735 }
static float trans_per_R | ( | float | Rtrans, | |
float | R_minW_trans | |||
) | [static] |
Definition at line 739 of file rr_graph_area.c.
00741 { 00742 00743 /* Returns the number of minimum width transistor area equivalents needed * 00744 * to make a transistor with Rtrans, given that the resistance of a minimum * 00745 * width transistor of this type is R_minW_trans. */ 00746 00747 float trans_area; 00748 00749 if(Rtrans <= 0.) /* Assume resistances are nonsense -- use min. width */ 00750 return (1.); 00751 00752 if(Rtrans >= R_minW_trans) 00753 return (1.); 00754 00755 /* Area = minimum width area (1) + 0.5 for each additional unit of width. * 00756 * The 50% factor takes into account the "overlapping" that occurs in * 00757 * horizontally-paralleled transistors, and the need for only one spacing, * 00758 * not two (i.e. two min W transistors need two spaces; a 2W transistor * 00759 * needs only 1). */ 00760 00761 trans_area = 0.5 * R_minW_trans / Rtrans + 0.5; 00762 return (trans_area); 00763 }