00001 #include <math.h>
00002 #include "util.h"
00003 #include "vpr_types.h"
00004 #include <assert.h>
00005 #include "globals.h"
00006 #include "rr_graph_util.h"
00007 #include "rr_graph_area.h"
00008
00009
00010
00011
00012 static void count_bidir_routing_transistors(int num_switch,
00013 float R_minW_nmos,
00014 float R_minW_pmos);
00015
00016 static void count_unidir_routing_transistors(t_segment_inf * segment_inf,
00017 float R_minW_nmos,
00018 float R_minW_pmos);
00019
00020 static float get_cblock_trans(int *num_inputs_to_cblock,
00021 int max_inputs_to_cblock,
00022 float trans_cblock_to_lblock_buf,
00023 float trans_sram_bit);
00024
00025 static float *alloc_and_load_unsharable_switch_trans(int num_switch,
00026 float trans_sram_bit,
00027 float R_minW_nmos);
00028
00029 static float *alloc_and_load_sharable_switch_trans(int num_switch,
00030 float trans_sram_bit,
00031 float R_minW_nmos,
00032 float R_minW_pmos);
00033
00034 static float trans_per_buf(float Rbuf,
00035 float R_minW_nmos,
00036 float R_minW_pmos);
00037
00038 static float trans_per_mux(int num_inputs,
00039 float trans_sram_bit,
00040 float pass_trans_area);
00041
00042 static float trans_per_R(float Rtrans,
00043 float R_minW_trans);
00044
00045
00046
00047
00048
00049 void
00050 count_routing_transistors(enum e_directionality directionality,
00051 int num_switch,
00052 t_segment_inf * segment_inf,
00053 float R_minW_nmos,
00054 float R_minW_pmos)
00055 {
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068 if(directionality == BI_DIRECTIONAL)
00069 {
00070 count_bidir_routing_transistors(num_switch, R_minW_nmos,
00071 R_minW_pmos);
00072 }
00073 else
00074 {
00075 assert(directionality == UNI_DIRECTIONAL);
00076 count_unidir_routing_transistors(segment_inf, R_minW_nmos,
00077 R_minW_pmos);
00078 }
00079 }
00080
00081 void
00082 count_bidir_routing_transistors(int num_switch,
00083 float R_minW_nmos,
00084 float R_minW_pmos)
00085 {
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109 int *num_inputs_to_cblock;
00110
00111
00112
00113 boolean *cblock_counted;
00114 float *shared_buffer_trans;
00115 float *unsharable_switch_trans, *sharable_switch_trans;
00116
00117 t_rr_type from_rr_type, to_rr_type;
00118 int from_node, to_node, iedge, num_edges, maxlen;
00119 int iswitch, i, j, iseg, max_inputs_to_cblock;
00120 float input_cblock_trans, shared_opin_buffer_trans;
00121 const float trans_sram_bit = 6.;
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131 double ntrans_sharing, ntrans_no_sharing;
00132
00133
00134
00135
00136
00137
00138 float trans_track_to_cblock_buf;
00139 float trans_cblock_to_lblock_buf;
00140
00141
00142 ntrans_sharing = 0.;
00143 ntrans_no_sharing = 0.;
00144 max_inputs_to_cblock = 0;
00145
00146
00147
00148
00149
00150 trans_track_to_cblock_buf =
00151 trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos);
00152
00153 trans_cblock_to_lblock_buf =
00154 trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos);
00155
00156 num_inputs_to_cblock = (int *)my_calloc(num_rr_nodes, sizeof(int));
00157
00158 maxlen = max(nx, ny) + 1;
00159 cblock_counted = (boolean *) my_calloc(maxlen, sizeof(boolean));
00160 shared_buffer_trans = (float *)my_calloc(maxlen, sizeof(float));
00161
00162 unsharable_switch_trans =
00163 alloc_and_load_unsharable_switch_trans(num_switch, trans_sram_bit,
00164 R_minW_nmos);
00165
00166 sharable_switch_trans =
00167 alloc_and_load_sharable_switch_trans(num_switch, trans_sram_bit,
00168 R_minW_nmos, R_minW_pmos);
00169
00170 for(from_node = 0; from_node < num_rr_nodes; from_node++)
00171 {
00172
00173 from_rr_type = rr_node[from_node].type;
00174
00175 switch (from_rr_type)
00176 {
00177
00178 case CHANX:
00179 case CHANY:
00180 num_edges = rr_node[from_node].num_edges;
00181
00182 for(iedge = 0; iedge < num_edges; iedge++)
00183 {
00184
00185 to_node = rr_node[from_node].edges[iedge];
00186 to_rr_type = rr_node[to_node].type;
00187
00188 switch (to_rr_type)
00189 {
00190
00191 case CHANX:
00192 case CHANY:
00193 iswitch =
00194 rr_node[from_node].switches[iedge];
00195
00196 if(switch_inf[iswitch].buffered)
00197 {
00198 iseg =
00199 seg_index_of_sblock(from_node,
00200 to_node);
00201 shared_buffer_trans[iseg] =
00202 max(shared_buffer_trans[iseg],
00203 sharable_switch_trans
00204 [iswitch]);
00205
00206 ntrans_no_sharing +=
00207 unsharable_switch_trans
00208 [iswitch] +
00209 sharable_switch_trans
00210 [iswitch];
00211 ntrans_sharing +=
00212 unsharable_switch_trans
00213 [iswitch];
00214 }
00215 else if(from_node < to_node)
00216 {
00217
00218
00219
00220
00221 ntrans_no_sharing +=
00222 unsharable_switch_trans
00223 [iswitch];
00224 ntrans_sharing +=
00225 unsharable_switch_trans
00226 [iswitch];
00227 }
00228 break;
00229
00230 case IPIN:
00231 num_inputs_to_cblock[to_node]++;
00232 max_inputs_to_cblock =
00233 max(max_inputs_to_cblock,
00234 num_inputs_to_cblock[to_node]);
00235
00236 iseg =
00237 seg_index_of_cblock(from_rr_type,
00238 to_node);
00239
00240 if(cblock_counted[iseg] == FALSE)
00241 {
00242 cblock_counted[iseg] = TRUE;
00243 ntrans_sharing +=
00244 trans_track_to_cblock_buf;
00245 ntrans_no_sharing +=
00246 trans_track_to_cblock_buf;
00247 }
00248 break;
00249
00250 default:
00251 printf
00252 ("Error in count_routing_transistors: Unexpected \n"
00253 "connection from node %d (type %d) to node %d (type %d).\n",
00254 from_node, from_rr_type, to_node,
00255 to_rr_type);
00256 exit(1);
00257 break;
00258
00259 }
00260
00261 }
00262
00263
00264
00265 if(from_rr_type == CHANX)
00266 {
00267 for(i = rr_node[from_node].xlow - 1;
00268 i <= rr_node[from_node].xhigh; i++)
00269 {
00270 ntrans_sharing += shared_buffer_trans[i];
00271 shared_buffer_trans[i] = 0.;
00272 }
00273
00274 for(i = rr_node[from_node].xlow;
00275 i <= rr_node[from_node].xhigh; i++)
00276 cblock_counted[i] = FALSE;
00277
00278 }
00279 else
00280 {
00281 for(j = rr_node[from_node].ylow - 1;
00282 j <= rr_node[from_node].yhigh; j++)
00283 {
00284 ntrans_sharing += shared_buffer_trans[j];
00285 shared_buffer_trans[j] = 0.;
00286 }
00287
00288 for(j = rr_node[from_node].ylow;
00289 j <= rr_node[from_node].yhigh; j++)
00290 cblock_counted[j] = FALSE;
00291
00292 }
00293 break;
00294
00295 case OPIN:
00296 num_edges = rr_node[from_node].num_edges;
00297 shared_opin_buffer_trans = 0.;
00298
00299 for(iedge = 0; iedge < num_edges; iedge++)
00300 {
00301 iswitch = rr_node[from_node].switches[iedge];
00302 ntrans_no_sharing +=
00303 unsharable_switch_trans[iswitch] +
00304 sharable_switch_trans[iswitch];
00305 ntrans_sharing +=
00306 unsharable_switch_trans[iswitch];
00307
00308 shared_opin_buffer_trans =
00309 max(shared_opin_buffer_trans,
00310 sharable_switch_trans[iswitch]);
00311 }
00312
00313 ntrans_sharing += shared_opin_buffer_trans;
00314 break;
00315
00316 default:
00317 break;
00318
00319 }
00320 }
00321
00322 free(cblock_counted);
00323 free(shared_buffer_trans);
00324 free(unsharable_switch_trans);
00325 free(sharable_switch_trans);
00326
00327
00328
00329 input_cblock_trans = get_cblock_trans(num_inputs_to_cblock,
00330 max_inputs_to_cblock,
00331 trans_cblock_to_lblock_buf,
00332 trans_sram_bit);
00333
00334 free(num_inputs_to_cblock);
00335
00336 ntrans_sharing += input_cblock_trans;
00337 ntrans_no_sharing += input_cblock_trans;
00338
00339 printf("\nRouting area (in minimum width transistor areas):\n");
00340 printf
00341 ("Assuming no buffer sharing (pessimistic). Total: %#g Per logic tile: "
00342 "%#g\n", ntrans_no_sharing, ntrans_no_sharing / (float)(nx * ny));
00343 printf
00344 ("Assuming buffer sharing (slightly optimistic). Total: %#g Per logic tile: "
00345 "%#g\n\n", ntrans_sharing, ntrans_sharing / (float)(nx * ny));
00346 }
00347
00348 void
00349 count_unidir_routing_transistors(t_segment_inf * segment_inf,
00350 float R_minW_nmos,
00351 float R_minW_pmos)
00352 {
00353 boolean *cblock_counted;
00354 int *num_inputs_to_cblock;
00355
00356
00357
00358 t_rr_type from_rr_type, to_rr_type;
00359 int i, j, iseg, from_node, to_node, iedge, num_edges, maxlen;
00360 int max_inputs_to_cblock, cost_index, seg_type, switch_type;
00361 float input_cblock_trans;
00362 const float trans_sram_bit = 6.;
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372 double ntrans;
00373
00374
00375
00376
00377
00378
00379 float trans_track_to_cblock_buf;
00380 float trans_cblock_to_lblock_buf;
00381
00382 max_inputs_to_cblock = 0;
00383
00384
00385
00386
00387
00388 trans_track_to_cblock_buf =
00389 trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos);
00390
00391 trans_cblock_to_lblock_buf =
00392 trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos);
00393
00394 num_inputs_to_cblock = (int *)my_calloc(num_rr_nodes, sizeof(int));
00395 maxlen = max(nx, ny) + 1;
00396 cblock_counted = (boolean *) my_calloc(maxlen, sizeof(boolean));
00397
00398 ntrans = 0;
00399 for(from_node = 0; from_node < num_rr_nodes; from_node++)
00400 {
00401
00402 from_rr_type = rr_node[from_node].type;
00403
00404 switch (from_rr_type)
00405 {
00406
00407 case CHANX:
00408 case CHANY:
00409 num_edges = rr_node[from_node].num_edges;
00410 cost_index = rr_node[from_node].cost_index;
00411 seg_type = rr_indexed_data[cost_index].seg_index;
00412 switch_type = segment_inf[seg_type].wire_switch;
00413 assert(segment_inf[seg_type].wire_switch ==
00414 segment_inf[seg_type].opin_switch);
00415 assert(switch_inf[switch_type].mux_trans_size >= 1);
00416
00417 assert(rr_node[from_node].num_opin_drivers == 0);
00418 assert(rr_node[from_node].num_wire_drivers == 0);
00419
00420
00421
00422 ntrans +=
00423 trans_per_mux(rr_node[from_node].fan_in,
00424 trans_sram_bit,
00425 switch_inf[switch_type].mux_trans_size);
00426
00427
00428 if(switch_inf[switch_type].buf_size == 0)
00429 {
00430 ntrans +=
00431 trans_per_buf(switch_inf[switch_type].R,
00432 R_minW_nmos, R_minW_pmos);
00433 }
00434 else
00435 {
00436 ntrans += switch_inf[switch_type].buf_size;
00437 }
00438
00439 for(iedge = 0; iedge < num_edges; iedge++)
00440 {
00441
00442 to_node = rr_node[from_node].edges[iedge];
00443 to_rr_type = rr_node[to_node].type;
00444
00445 switch (to_rr_type)
00446 {
00447
00448 case CHANX:
00449 case CHANY:
00450 break;
00451
00452 case IPIN:
00453 num_inputs_to_cblock[to_node]++;
00454 max_inputs_to_cblock =
00455 max(max_inputs_to_cblock,
00456 num_inputs_to_cblock[to_node]);
00457 iseg =
00458 seg_index_of_cblock(from_rr_type,
00459 to_node);
00460
00461 if(cblock_counted[iseg] == FALSE)
00462 {
00463 cblock_counted[iseg] = TRUE;
00464 ntrans +=
00465 trans_track_to_cblock_buf;
00466 }
00467 break;
00468
00469 default:
00470 printf
00471 ("Error in count_routing_transistors: Unexpected \n"
00472 "connection from node %d (type %d) to node %d (type %d).\n",
00473 from_node, from_rr_type, to_node,
00474 to_rr_type);
00475 exit(1);
00476 break;
00477
00478 }
00479
00480 }
00481
00482
00483 if(from_rr_type == CHANX)
00484 {
00485 for(i = rr_node[from_node].xlow;
00486 i <= rr_node[from_node].xhigh; i++)
00487 cblock_counted[i] = FALSE;
00488
00489 }
00490 else
00491 {
00492 for(j = rr_node[from_node].ylow;
00493 j <= rr_node[from_node].yhigh; j++)
00494 cblock_counted[j] = FALSE;
00495
00496 }
00497 break;
00498 case OPIN:
00499 break;
00500
00501 default:
00502 break;
00503
00504 }
00505 }
00506
00507
00508
00509 input_cblock_trans = get_cblock_trans(num_inputs_to_cblock,
00510 max_inputs_to_cblock,
00511 trans_cblock_to_lblock_buf,
00512 trans_sram_bit);
00513
00514 free(cblock_counted);
00515 free(num_inputs_to_cblock);
00516
00517 ntrans += input_cblock_trans;
00518
00519 printf("\nRouting area (in minimum width transistor areas):\n");
00520 printf("Total Routing Area: %#g Per logic tile: %#g\n", ntrans,
00521 ntrans / (float)(nx * ny));
00522 }
00523
00524
00525 static float
00526 get_cblock_trans(int *num_inputs_to_cblock,
00527 int max_inputs_to_cblock,
00528 float trans_cblock_to_lblock_buf,
00529 float trans_sram_bit)
00530 {
00531
00532
00533
00534
00535
00536
00537 float *trans_per_cblock;
00538 float trans_count;
00539 int i, num_inputs;
00540
00541 trans_per_cblock = (float *)my_malloc((max_inputs_to_cblock + 1) *
00542 sizeof(float));
00543
00544 trans_per_cblock[0] = 0.;
00545
00546
00547
00548
00549
00550 for(i = 1; i <= max_inputs_to_cblock; i++)
00551 trans_per_cblock[i] =
00552 trans_per_mux(i, trans_sram_bit,
00553 ipin_mux_trans_size) + trans_cblock_to_lblock_buf;
00554
00555 trans_count = 0.;
00556
00557 for(i = 0; i < num_rr_nodes; i++)
00558 {
00559 num_inputs = num_inputs_to_cblock[i];
00560 trans_count += trans_per_cblock[num_inputs];
00561 }
00562
00563 free(trans_per_cblock);
00564 return (trans_count);
00565 }
00566
00567
00568 static float *
00569 alloc_and_load_unsharable_switch_trans(int num_switch,
00570 float trans_sram_bit,
00571 float R_minW_nmos)
00572 {
00573
00574
00575
00576
00577
00578
00579 float *unsharable_switch_trans, Rpass;
00580 int i;
00581
00582 unsharable_switch_trans = (float *)my_malloc(num_switch * sizeof(float));
00583
00584 for(i = 0; i < num_switch; i++)
00585 {
00586
00587 if(switch_inf[i].buffered == FALSE)
00588 {
00589 Rpass = switch_inf[i].R;
00590 }
00591 else
00592 {
00593 Rpass = switch_inf[i].R / 2.;
00594 }
00595
00596 unsharable_switch_trans[i] = trans_per_R(Rpass, R_minW_nmos) +
00597 trans_sram_bit;
00598 }
00599
00600 return (unsharable_switch_trans);
00601 }
00602
00603
00604 static float *
00605 alloc_and_load_sharable_switch_trans(int num_switch,
00606 float trans_sram_bit,
00607 float R_minW_nmos,
00608 float R_minW_pmos)
00609 {
00610
00611
00612
00613
00614
00615
00616
00617 float *sharable_switch_trans, Rbuf;
00618 int i;
00619
00620 sharable_switch_trans = (float *)my_malloc(num_switch * sizeof(float));
00621
00622 for(i = 0; i < num_switch; i++)
00623 {
00624
00625 if(switch_inf[i].buffered == FALSE)
00626 {
00627 sharable_switch_trans[i] = 0.;
00628 }
00629 else
00630 {
00631 Rbuf = switch_inf[i].R / 2.;
00632 sharable_switch_trans[i] =
00633 trans_per_buf(Rbuf, R_minW_nmos, R_minW_pmos);
00634 }
00635 }
00636
00637 return (sharable_switch_trans);
00638 }
00639
00640
00641 static float
00642 trans_per_buf(float Rbuf,
00643 float R_minW_nmos,
00644 float R_minW_pmos)
00645 {
00646
00647
00648
00649
00650
00651 int num_stage, istage;
00652 float trans_count, stage_ratio, Rstage;
00653
00654 if(Rbuf > 0.6 * R_minW_nmos || Rbuf <= 0.)
00655 {
00656 trans_count = trans_per_R(Rbuf, R_minW_nmos) + trans_per_R(Rbuf,
00657 R_minW_pmos);
00658 }
00659 else
00660 {
00661
00662
00663
00664
00665 num_stage = nint(log10(R_minW_nmos / Rbuf) / log10(4.));
00666 num_stage = max(num_stage, 1);
00667 stage_ratio = pow(R_minW_nmos / Rbuf, 1. / (float)num_stage);
00668
00669 Rstage = R_minW_nmos;
00670 trans_count = 0.;
00671
00672 for(istage = 0; istage <= num_stage; istage++)
00673 {
00674 trans_count +=
00675 trans_per_R(Rstage, R_minW_nmos) + trans_per_R(Rstage,
00676 R_minW_pmos);
00677 Rstage /= stage_ratio;
00678 }
00679 }
00680
00681 return (trans_count);
00682 }
00683
00684
00685 static float
00686 trans_per_mux(int num_inputs,
00687 float trans_sram_bit,
00688 float pass_trans_area)
00689 {
00690
00691
00692
00693
00694
00695
00696 float ntrans, sram_trans, pass_trans;
00697 int num_second_stage_trans;
00698
00699 if(num_inputs <= 1)
00700 {
00701 return (0);
00702 }
00703 else if(num_inputs == 2)
00704 {
00705 pass_trans = 2 * pass_trans_area;
00706 sram_trans = 1 * trans_sram_bit;
00707 }
00708 else if(num_inputs <= 4)
00709 {
00710
00711 pass_trans = num_inputs * pass_trans_area;
00712 sram_trans = num_inputs * trans_sram_bit;
00713 }
00714 else
00715 {
00716
00717
00718
00719 num_second_stage_trans = floor(sqrt(num_inputs) + 0.00001);
00720 pass_trans =
00721 (num_inputs + num_second_stage_trans) * pass_trans_area;
00722 sram_trans =
00723 (ceil((float)num_inputs / num_second_stage_trans - 0.00001) +
00724 num_second_stage_trans) * trans_sram_bit;
00725 if(num_second_stage_trans == 2)
00726 {
00727
00728
00729 sram_trans -= 1 * trans_sram_bit;
00730 }
00731 }
00732
00733 ntrans = pass_trans + sram_trans;
00734 return (ntrans);
00735 }
00736
00737
00738 static float
00739 trans_per_R(float Rtrans,
00740 float R_minW_trans)
00741 {
00742
00743
00744
00745
00746
00747 float trans_area;
00748
00749 if(Rtrans <= 0.)
00750 return (1.);
00751
00752 if(Rtrans >= R_minW_trans)
00753 return (1.);
00754
00755
00756
00757
00758
00759
00760
00761 trans_area = 0.5 * R_minW_trans / Rtrans + 0.5;
00762 return (trans_area);
00763 }