|
VPR-6.0
|
This graph shows which files directly or indirectly include this file:Go to the source code of this file.
Functions | |
| void | alloc_and_load_cluster_legality_checker () |
| void | alloc_and_load_legalizer_for_cluster (INP t_block *clb, INP int clb_index, INP const t_arch *arch) |
| void | free_legalizer_for_cluster (INP t_block *clb) |
| void | free_cluster_legality_checker () |
| void | reset_legalizer_for_cluster (t_block *clb) |
| enum e_block_pack_status | try_add_block_to_current_cluster_and_primitive (INP int logical_block, INP t_pb *primitive) |
| void | save_cluster_solution () |
| boolean | is_pin_open (int i) |
| void | set_pb_mode (t_pb *pb, int mode, int isOn) |
| void | alloc_and_load_rr_graph_for_pb_graph_node (INP t_pb_graph_node *pb_graph_node, INP const t_arch *arch, int mode) |
| void alloc_and_load_cluster_legality_checker | ( | ) |
Legalizes routing for a cluster
Definition at line 218 of file cluster_legality.c.
{
best_routing = (struct s_trace **)my_calloc(num_logical_nets, sizeof(struct s_trace *));
nets_in_cluster = my_malloc(num_logical_nets * sizeof(int));
num_nets_in_cluster = 0;
num_nets = num_logical_nets;
/* inside a cluster, I do not consider rr_indexed_data cost, set to 1 since other costs are multiplied by it */
num_rr_indexed_data = 1;
rr_indexed_data = my_calloc(1, sizeof(t_rr_indexed_data));
rr_indexed_data[0].base_cost = 1;
/* alloc routing structures */
alloc_route_static_structs();
alloc_net_rr_terminals_cluster();
}
Here is the call graph for this function:
Here is the caller graph for this function:| void alloc_and_load_legalizer_for_cluster | ( | INP t_block * | clb, |
| INP int | clb_index, | ||
| INP const t_arch * | arch | ||
| ) |
Structure: Model external routing and internal routing
1. Model external routing num input pins == num external sources for input pins, fully connect them to input pins (simulates external routing) num output pins == num external sinks for output pins, fully connect them to output pins (simulates external routing) num clock pins == num external sources for clock pins, fully connect them to clock pins (simulates external routing) 2. Model internal routing
Definition at line 372 of file cluster_legality.c.
{
/* make each rr_node one correspond with pin and correspond with pin's index pin_count_in_cluster */
int i, j, k, m, index, pb_graph_rr_index;
int count_pins;
t_pb_type * pb_type;
t_pb_graph_node *pb_graph_node;
int ipin;
/* Create rr_graph */
pb_type = clb->type->pb_type;
pb_graph_node = clb->type->pb_graph_head;
num_rr_nodes = pb_graph_node->total_pb_pins + pb_type->num_input_pins +
pb_type->num_output_pins + pb_type->num_clock_pins;
if(num_rr_nodes > num_rr_intrinsic_cost) {
free(rr_intrinsic_cost);
rr_intrinsic_cost = my_calloc(num_rr_nodes, sizeof(float));
num_rr_intrinsic_cost = num_rr_nodes;
}
rr_node = my_calloc(num_rr_nodes, sizeof(t_rr_node));
clb->pb->rr_graph = rr_node;
alloc_and_load_rr_graph_for_pb_graph_node(pb_graph_node, arch, 0);
curr_cluster_index = clb_index;
/* Alloc and load rr_graph external sources and sinks */
ext_input_rr_node_index = pb_graph_node->total_pb_pins;
ext_output_rr_node_index = pb_type->num_input_pins + pb_graph_node->total_pb_pins;
ext_clock_rr_node_index = pb_type->num_input_pins + pb_type->num_output_pins + pb_graph_node->total_pb_pins;
max_ext_index = pb_type->num_input_pins + pb_type->num_output_pins + pb_type->num_clock_pins + pb_graph_node->total_pb_pins;
for(i = 0; i < pb_type->num_input_pins; i++) {
index = i + pb_graph_node->total_pb_pins;
rr_node[index].type = SOURCE;
rr_node[index].fan_in = 0;
rr_node[index].num_edges = pb_type->num_input_pins;
rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int));
rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int));
rr_node[index].capacity = 1;
rr_intrinsic_cost[index] = 0;
}
for(i = 0; i < pb_type->num_output_pins; i++) {
index = i + pb_type->num_input_pins + pb_graph_node->total_pb_pins;
rr_node[index].type = SINK;
rr_node[index].fan_in = pb_type->num_output_pins;
rr_node[index].num_edges = 0;
rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
rr_node[index].capacity = 1;
rr_intrinsic_cost[index] = 0;
}
for(i = 0; i < pb_type->num_clock_pins; i++) {
index = i + pb_type->num_input_pins + pb_type->num_output_pins + pb_graph_node->total_pb_pins;
rr_node[index].type = SOURCE;
rr_node[index].fan_in = 0;
rr_node[index].num_edges = pb_type->num_clock_pins;
rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int));
rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int));
rr_node[index].capacity = 1;
rr_intrinsic_cost[index] = 0;
}
ipin = 0;
for(i = 0; i < pb_graph_node->num_input_ports; i++) {
for(j = 0; j < pb_graph_node->num_input_pins[i]; j++) {
pb_graph_rr_index = pb_graph_node->input_pins[i][j].pin_count_in_cluster;
for(k = 0; k < pb_type->num_input_pins; k++) {
index = k + pb_graph_node->total_pb_pins;
rr_node[index].edges[ipin] = pb_graph_rr_index;
rr_node[index].switches[ipin] = arch->num_switches - 1;
}
rr_node[pb_graph_rr_index].pack_intrinsic_cost = MAX_SHORT; /* using an input pin should be made costly */
ipin++;
}
}
/* Must attach output pins to input pins because if a connection cannot fit using intra-cluster routing, it can also use external routing */
for(i = 0; i < pb_graph_node->num_output_ports; i++) {
for(j = 0; j < pb_graph_node->num_output_pins[i]; j++) {
count_pins = pb_graph_node->output_pins[i][j].num_output_edges + pb_type->num_output_pins + pb_type->num_input_pins;
pb_graph_rr_index = pb_graph_node->output_pins[i][j].pin_count_in_cluster;
rr_node[pb_graph_rr_index].edges = my_realloc(rr_node[pb_graph_rr_index].edges,
(count_pins) * sizeof(int));
rr_node[pb_graph_rr_index].switches = my_realloc(rr_node[pb_graph_rr_index].switches,
(count_pins) * sizeof(int));
ipin = 0;
for(k = 0; k < pb_graph_node->num_input_ports; k++) {
for(m = 0; m < pb_graph_node->num_input_pins[k]; m++) {
index = pb_graph_node->input_pins[k][m].pin_count_in_cluster;
rr_node[pb_graph_rr_index].edges[ipin + pb_graph_node->output_pins[i][j].num_output_edges] = index;
rr_node[pb_graph_rr_index].switches[ipin + pb_graph_node->output_pins[i][j].num_output_edges] = arch->num_switches - 1;
ipin++;
}
}
for(k = 0; k < pb_type->num_output_pins; k++) {
index = k + pb_type->num_input_pins + pb_graph_node->total_pb_pins;
rr_node[pb_graph_rr_index].edges[k + pb_type->num_input_pins + pb_graph_node->output_pins[i][j].num_output_edges] = index;
rr_node[pb_graph_rr_index].switches[k + pb_type->num_input_pins + pb_graph_node->output_pins[i][j].num_output_edges] = arch->num_switches - 1;
}
rr_node[pb_graph_rr_index].num_edges += pb_type->num_output_pins + pb_type->num_input_pins;
rr_node[pb_graph_rr_index].pack_intrinsic_cost = 1 + (float)rr_node[pb_graph_rr_index].num_edges / 5; /* need to normalize better than 5 */
}
}
ipin = 0;
for(i = 0; i < pb_graph_node->num_clock_ports; i++) {
for(j = 0; j < pb_graph_node->num_clock_pins[i]; j++) {
for(k = 0; k < pb_type->num_clock_pins; k++) {
index = k + pb_type->num_input_pins + pb_type->num_output_pins + pb_graph_node->total_pb_pins;
pb_graph_rr_index = pb_graph_node->clock_pins[i][j].pin_count_in_cluster;
rr_node[index].edges[ipin] = pb_graph_rr_index;
rr_node[index].switches[ipin] = arch->num_switches - 1;
}
ipin++;
}
}
alloc_and_load_rr_node_route_structs();
num_nets_in_cluster = 0;
}
Here is the call graph for this function:
Here is the caller graph for this function:| void alloc_and_load_rr_graph_for_pb_graph_node | ( | INP t_pb_graph_node * | pb_graph_node, |
| INP const t_arch * | arch, | ||
| int | mode | ||
| ) |
Definition at line 254 of file cluster_legality.c.
{
int i, j, k, index;
boolean is_primitive;
is_primitive = (pb_graph_node->pb_type->num_modes == 0);
for(i = 0; i < pb_graph_node->num_input_ports; i++) {
for(j = 0; j < pb_graph_node->num_input_pins[i]; j++) {
index = pb_graph_node->input_pins[i][j].pin_count_in_cluster;
rr_node[index].pb_graph_pin = &pb_graph_node->input_pins[i][j];
rr_node[index].fan_in = pb_graph_node->input_pins[i][j].num_input_edges;
rr_node[index].num_edges = pb_graph_node->input_pins[i][j].num_output_edges;
rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int));
rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int));
rr_node[index].net_num = OPEN;
rr_node[index].prev_node = OPEN;
rr_node[index].prev_edge = OPEN;
if(mode == 0) { /* default mode is the first mode */
rr_node[index].capacity = 1;
} else {
rr_node[index].capacity = 0;
}
for(k = 0; k < pb_graph_node->input_pins[i][j].num_output_edges; k++) {
/* TODO: Intention was to do bus-based implementation here */
rr_node[index].edges[k] = pb_graph_node->input_pins[i][j].output_edges[k]->output_pins[0]->pin_count_in_cluster;
rr_node[index].switches[k] = arch->num_switches - 1; /* last switch in arch switch properties is a delayless switch */
assert(pb_graph_node->input_pins[i][j].output_edges[k]->num_output_pins == 1);
}
rr_node[index].type = INTRA_CLUSTER_EDGE;
if(is_primitive) {
rr_node[index].type = SINK;
}
}
}
for(i = 0; i < pb_graph_node->num_output_ports; i++) {
for(j = 0; j < pb_graph_node->num_output_pins[i]; j++) {
index = pb_graph_node->output_pins[i][j].pin_count_in_cluster;
rr_node[index].pb_graph_pin = &pb_graph_node->output_pins[i][j];
rr_node[index].fan_in = pb_graph_node->output_pins[i][j].num_input_edges;
rr_node[index].num_edges = pb_graph_node->output_pins[i][j].num_output_edges;
rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int));
rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int));
rr_node[index].net_num = OPEN;
rr_node[index].prev_node = OPEN;
rr_node[index].prev_edge = OPEN;
if(mode == 0) { /* Default mode is the first mode */
rr_node[index].capacity = 1;
} else {
rr_node[index].capacity = 0;
}
for(k = 0; k < pb_graph_node->output_pins[i][j].num_output_edges; k++) {
/* TODO: Intention was to do bus-based implementation here */
rr_node[index].edges[k] = pb_graph_node->output_pins[i][j].output_edges[k]->output_pins[0]->pin_count_in_cluster;
rr_node[index].switches[k] = arch->num_switches - 1;
assert(pb_graph_node->output_pins[i][j].output_edges[k]->num_output_pins == 1);
}
rr_node[index].type = INTRA_CLUSTER_EDGE;
if(is_primitive) {
rr_node[index].type = SOURCE;
}
}
}
for(i = 0; i < pb_graph_node->num_clock_ports; i++) {
for(j = 0; j < pb_graph_node->num_clock_pins[i]; j++) {
index = pb_graph_node->clock_pins[i][j].pin_count_in_cluster;
rr_node[index].pb_graph_pin = &pb_graph_node->clock_pins[i][j];
rr_node[index].fan_in = pb_graph_node->clock_pins[i][j].num_input_edges;
rr_node[index].num_edges = pb_graph_node->clock_pins[i][j].num_output_edges;
rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int));
rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int));
rr_node[index].net_num = OPEN;
rr_node[index].prev_node = OPEN;
rr_node[index].prev_edge = OPEN;
if(mode == 0) { /* default mode is the first mode (useful for routing */
rr_node[index].capacity = 1;
} else {
rr_node[index].capacity = 0;
}
for(k = 0; k < pb_graph_node->clock_pins[i][j].num_output_edges; k++) {
/* TODO: Intention was to do bus-based implementation here */
rr_node[index].edges[k] = pb_graph_node->clock_pins[i][j].output_edges[k]->output_pins[0]->pin_count_in_cluster;
rr_node[index].switches[k] = arch->num_switches - 1;
assert(pb_graph_node->clock_pins[i][j].output_edges[k]->num_output_pins == 1);
}
rr_node[index].type = INTRA_CLUSTER_EDGE;
if(is_primitive) {
rr_node[index].type = SINK;
}
}
}
for(i = 0; i < pb_graph_node->pb_type->num_modes; i++) {
for(j = 0; j < pb_graph_node->pb_type->modes[i].num_pb_type_children; j++) {
for(k = 0; k < pb_graph_node->pb_type->modes[i].pb_type_children[j].num_pb; k++) {
alloc_and_load_rr_graph_for_pb_graph_node(&pb_graph_node->child_pb_graph_nodes[i][j][k], arch, i);
}
}
}
}
Here is the call graph for this function:
Here is the caller graph for this function:| void free_cluster_legality_checker | ( | ) |
Definition at line 235 of file cluster_legality.c.
{
int inet;
free(rr_indexed_data);
free_rr_node_route_structs();
free_route_structs(NULL);
free_trace_structs();
free_chunk_memory(rr_mem_chunk_list_head);
rr_mem_chunk_list_head = NULL;
for(inet = 0; inet < num_logical_nets; inet++)
{
free(saved_net_rr_terminals[inet]);
}
free(net_rr_terminals);
free(saved_net_rr_terminals);
}
Here is the call graph for this function:
Here is the caller graph for this function:| void free_legalizer_for_cluster | ( | INP t_block * | clb | ) |
Definition at line 500 of file cluster_legality.c.
{
int i;
free_rr_node_route_structs();
for(i = 0; i < num_rr_nodes; i++) {
if(clb->pb->rr_graph[i].edges != NULL) {
free(clb->pb->rr_graph[i].edges);
}
if(clb->pb->rr_graph[i].switches != NULL) {
free(clb->pb->rr_graph[i].switches);
}
}
free(clb->pb->rr_graph);
}
Here is the call graph for this function:
Here is the caller graph for this function:| boolean is_pin_open | ( | int | i | ) |
Definition at line 1051 of file cluster_legality.c.
{
return (rr_node[i].occ == 0);
}
| void reset_legalizer_for_cluster | ( | t_block * | clb | ) |
Definition at line 515 of file cluster_legality.c.
{
int i;
for(i = 0; i < num_nets_in_cluster; i++) {
free_traceback(nets_in_cluster[i]);
trace_head[nets_in_cluster[i]] = best_routing[nets_in_cluster[i]];
free_traceback(nets_in_cluster[i]);
best_routing[nets_in_cluster[i]] = NULL;
}
free_rr_node_route_structs();
num_nets_in_cluster = 0;
}
Here is the call graph for this function:
Here is the caller graph for this function:| void save_cluster_solution | ( | ) |
This routine updates the occupancy and pres_cost of the rr_nodes that are affected by the portion of the routing of one net that starts at route_segment_start. If route_segment_start is trace_head[inet], the cost of all the nodes in the routing of net inet are updated. If add_or_sub is -1 the net (or net portion) is ripped up, if it is 1 the net is added to the routing. The size of pres_fac determines how severly oversubscribed rr_nodes are penalized.
Definition at line 1002 of file cluster_legality.c.
{
int i, j, net_index;
struct s_trace *tptr, *prev;
int inode;
for(i = 0; i < max_ext_index; i++) {
rr_node[i].net_num = OPEN;
rr_node[i].prev_edge = OPEN;
rr_node[i].prev_node = OPEN;
}
for(i = 0; i < num_nets_in_cluster; i++) {
prev = NULL;
net_index = nets_in_cluster[i];
tptr = trace_head[net_index];
if(tptr == NULL) /* No routing yet. */
return;
for(;;)
{
inode = tptr->index;
rr_node[inode].net_num = net_index;
if(prev != NULL) {
rr_node[inode].prev_node = prev->index;
for(j = 0; j < rr_node[prev->index].num_edges; j++) {
if(rr_node[prev->index].edges[j] == inode) {
rr_node[inode].prev_edge = j;
break;
}
}
assert(j != rr_node[prev->index].num_edges);
} else {
rr_node[inode].prev_node = OPEN;
rr_node[inode].prev_edge = OPEN;
}
if(rr_node[inode].type == SINK)
{
tptr = tptr->next; /* Skip next segment. */
if(tptr == NULL)
break;
}
prev = tptr;
tptr = tptr->next;
} /* End while loop -- did an entire traceback. */
}
}
Here is the caller graph for this function:| void set_pb_mode | ( | t_pb * | pb, |
| int | mode, | ||
| int | isOn | ||
| ) |
turns on mode for a pb by setting capacity of its rr_nodes to 1
Definition at line 1112 of file cluster_legality.c.
{
int i, j, index;
int i_pb_type, i_pb_inst;
const t_pb_type *pb_type;
t_pb_graph_node *pb_graph_node;
pb_type = pb->pb_graph_node->pb_type;
for(i_pb_type = 0; i_pb_type < pb_type->modes[mode].num_pb_type_children; i_pb_type++) {
for(i_pb_inst = 0; i_pb_inst < pb_type->modes[mode].pb_type_children[i_pb_type].num_pb; i_pb_inst++) {
pb_graph_node = &pb->pb_graph_node->child_pb_graph_nodes[mode][i_pb_type][i_pb_inst];
for(i = 0; i < pb_graph_node->num_input_ports; i++) {
for(j = 0; j < pb_graph_node->num_input_pins[i]; j++) {
index = pb_graph_node->input_pins[i][j].pin_count_in_cluster;
rr_node[index].capacity = isOn;
}
}
for(i = 0; i < pb_graph_node->num_output_ports; i++) {
for(j = 0; j < pb_graph_node->num_output_pins[i]; j++) {
index = pb_graph_node->output_pins[i][j].pin_count_in_cluster;
rr_node[index].capacity = isOn;
}
}
for(i = 0; i < pb_graph_node->num_clock_ports; i++) {
for(j = 0; j < pb_graph_node->num_clock_pins[i]; j++) {
index = pb_graph_node->clock_pins[i][j].pin_count_in_cluster;
rr_node[index].capacity = isOn;
}
}
}
}
}
Here is the caller graph for this function:| enum e_block_pack_status try_add_block_to_current_cluster_and_primitive | ( | INP int | iblock, |
| INP t_pb * | primitive | ||
| ) |
Allocates and loads the net_rr_terminals data structure. For each net it stores the rr_node index of the SOURCE of the net and all the SINKs of the net. [0..num_logical_nets-1][0..num_pins-1].
Definition at line 825 of file cluster_legality.c.
{
int ipin, iblk_net;
int orig_num_nets_in_cluster;
boolean success, found;
t_model_ports *port;
success = FALSE;
found = FALSE;
assert(primitive->pb_graph_node->pb_type->num_modes == 0); /* check if primitive */
assert(primitive->logical_block == OPEN && logical_block[iblock].clb_index == NO_CLUSTER); /* check if primitive and block is open */
/* check if block type matches primitive type */
if(logical_block[iblock].model != primitive->pb_graph_node->pb_type->model) {
/* End early, model is incompatible */
return BLK_FAILED_FEASIBLE;
}
orig_num_nets_in_cluster = num_nets_in_cluster;
save_and_reset_routing_cluster();
/* try pack it in */
assert(primitive->logical_block == OPEN);
assert(logical_block[iblock].clb_index == NO_CLUSTER);
primitive->logical_block = iblock;
logical_block[iblock].pb = primitive;
logical_block[iblock].clb_index = curr_cluster_index;
/* for each net of logical block, check if it is in cluster, if not add it */
/* also check if pins on primitive can fit logical block */
port = logical_block[iblock].model->inputs;
success = TRUE;
while(port && success) {
for(ipin = 0; ipin < port->size && success; ipin++) {
if(port->is_clock) {
assert(port->size == 1);
iblk_net = logical_block[iblock].clock_net;
} else {
iblk_net = logical_block[iblock].input_nets[port->index][ipin];
}
if(iblk_net == OPEN) {
continue;
}
if(!is_net_in_cluster(iblk_net)){
nets_in_cluster[num_nets_in_cluster] = iblk_net;
num_nets_in_cluster++;
}
success = add_net_rr_terminal_cluster(iblk_net, primitive, iblock, port, ipin);
}
port = port->next;
}
port = logical_block[iblock].model->outputs;
while(port && success) {
for(ipin = 0; ipin < port->size && success; ipin++) {
iblk_net = logical_block[iblock].output_nets[port->index][ipin];
if(iblk_net == OPEN) {
continue;
}
if(!is_net_in_cluster(iblk_net)){
nets_in_cluster[num_nets_in_cluster] = iblk_net;
num_nets_in_cluster++;
}
success = add_net_rr_terminal_cluster(iblk_net, primitive, iblock, port, ipin);
}
port = port->next;
}
if(success) {
success = reload_ext_net_rr_terminal_cluster();
}
if(success) {
/* route it */
reset_rr_node_route_structs(); /* Clear all prior rr_graph history */
success = try_breadth_first_route_cluster(); /* route from scratch */
}
if(success) {
return BLK_PASSED;
} else {
/* Cannot pack, restore cluster */
primitive->logical_block = OPEN;
logical_block[iblock].pb = NULL;
logical_block[iblock].clb_index = NO_CLUSTER;
restore_routing_cluster();
num_nets_in_cluster = orig_num_nets_in_cluster;
return BLK_FAILED_ROUTE;
}
}
Here is the call graph for this function:
Here is the caller graph for this function: