VPR-6.0
|
Go to the source code of this file.
Functions | |
void | alloc_and_load_cluster_legality_checker () |
void | alloc_and_load_legalizer_for_cluster (INP t_block *clb, INP int clb_index, INP const t_arch *arch) |
void | free_legalizer_for_cluster (INP t_block *clb) |
void | free_cluster_legality_checker () |
void | reset_legalizer_for_cluster (t_block *clb) |
enum e_block_pack_status | try_add_block_to_current_cluster_and_primitive (INP int logical_block, INP t_pb *primitive) |
void | save_cluster_solution () |
boolean | is_pin_open (int i) |
void | set_pb_mode (t_pb *pb, int mode, int isOn) |
void | alloc_and_load_rr_graph_for_pb_graph_node (INP t_pb_graph_node *pb_graph_node, INP const t_arch *arch, int mode) |
void alloc_and_load_cluster_legality_checker | ( | ) |
Legalizes routing for a cluster
Definition at line 218 of file cluster_legality.c.
{ best_routing = (struct s_trace **)my_calloc(num_logical_nets, sizeof(struct s_trace *)); nets_in_cluster = my_malloc(num_logical_nets * sizeof(int)); num_nets_in_cluster = 0; num_nets = num_logical_nets; /* inside a cluster, I do not consider rr_indexed_data cost, set to 1 since other costs are multiplied by it */ num_rr_indexed_data = 1; rr_indexed_data = my_calloc(1, sizeof(t_rr_indexed_data)); rr_indexed_data[0].base_cost = 1; /* alloc routing structures */ alloc_route_static_structs(); alloc_net_rr_terminals_cluster(); }
void alloc_and_load_legalizer_for_cluster | ( | INP t_block * | clb, |
INP int | clb_index, | ||
INP const t_arch * | arch | ||
) |
Structure: Model external routing and internal routing
1. Model external routing num input pins == num external sources for input pins, fully connect them to input pins (simulates external routing) num output pins == num external sinks for output pins, fully connect them to output pins (simulates external routing) num clock pins == num external sources for clock pins, fully connect them to clock pins (simulates external routing) 2. Model internal routing
Definition at line 372 of file cluster_legality.c.
{ /* make each rr_node one correspond with pin and correspond with pin's index pin_count_in_cluster */ int i, j, k, m, index, pb_graph_rr_index; int count_pins; t_pb_type * pb_type; t_pb_graph_node *pb_graph_node; int ipin; /* Create rr_graph */ pb_type = clb->type->pb_type; pb_graph_node = clb->type->pb_graph_head; num_rr_nodes = pb_graph_node->total_pb_pins + pb_type->num_input_pins + pb_type->num_output_pins + pb_type->num_clock_pins; if(num_rr_nodes > num_rr_intrinsic_cost) { free(rr_intrinsic_cost); rr_intrinsic_cost = my_calloc(num_rr_nodes, sizeof(float)); num_rr_intrinsic_cost = num_rr_nodes; } rr_node = my_calloc(num_rr_nodes, sizeof(t_rr_node)); clb->pb->rr_graph = rr_node; alloc_and_load_rr_graph_for_pb_graph_node(pb_graph_node, arch, 0); curr_cluster_index = clb_index; /* Alloc and load rr_graph external sources and sinks */ ext_input_rr_node_index = pb_graph_node->total_pb_pins; ext_output_rr_node_index = pb_type->num_input_pins + pb_graph_node->total_pb_pins; ext_clock_rr_node_index = pb_type->num_input_pins + pb_type->num_output_pins + pb_graph_node->total_pb_pins; max_ext_index = pb_type->num_input_pins + pb_type->num_output_pins + pb_type->num_clock_pins + pb_graph_node->total_pb_pins; for(i = 0; i < pb_type->num_input_pins; i++) { index = i + pb_graph_node->total_pb_pins; rr_node[index].type = SOURCE; rr_node[index].fan_in = 0; rr_node[index].num_edges = pb_type->num_input_pins; rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */ rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int)); rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int)); rr_node[index].capacity = 1; rr_intrinsic_cost[index] = 0; } for(i = 0; i < pb_type->num_output_pins; i++) { index = i + pb_type->num_input_pins + pb_graph_node->total_pb_pins; rr_node[index].type = SINK; rr_node[index].fan_in = pb_type->num_output_pins; rr_node[index].num_edges = 0; rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */ rr_node[index].capacity = 1; rr_intrinsic_cost[index] = 0; } for(i = 0; i < pb_type->num_clock_pins; i++) { index = i + pb_type->num_input_pins + pb_type->num_output_pins + pb_graph_node->total_pb_pins; rr_node[index].type = SOURCE; rr_node[index].fan_in = 0; rr_node[index].num_edges = pb_type->num_clock_pins; rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */ rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int)); rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int)); rr_node[index].capacity = 1; rr_intrinsic_cost[index] = 0; } ipin = 0; for(i = 0; i < pb_graph_node->num_input_ports; i++) { for(j = 0; j < pb_graph_node->num_input_pins[i]; j++) { pb_graph_rr_index = pb_graph_node->input_pins[i][j].pin_count_in_cluster; for(k = 0; k < pb_type->num_input_pins; k++) { index = k + pb_graph_node->total_pb_pins; rr_node[index].edges[ipin] = pb_graph_rr_index; rr_node[index].switches[ipin] = arch->num_switches - 1; } rr_node[pb_graph_rr_index].pack_intrinsic_cost = MAX_SHORT; /* using an input pin should be made costly */ ipin++; } } /* Must attach output pins to input pins because if a connection cannot fit using intra-cluster routing, it can also use external routing */ for(i = 0; i < pb_graph_node->num_output_ports; i++) { for(j = 0; j < pb_graph_node->num_output_pins[i]; j++) { count_pins = pb_graph_node->output_pins[i][j].num_output_edges + pb_type->num_output_pins + pb_type->num_input_pins; pb_graph_rr_index = pb_graph_node->output_pins[i][j].pin_count_in_cluster; rr_node[pb_graph_rr_index].edges = my_realloc(rr_node[pb_graph_rr_index].edges, (count_pins) * sizeof(int)); rr_node[pb_graph_rr_index].switches = my_realloc(rr_node[pb_graph_rr_index].switches, (count_pins) * sizeof(int)); ipin = 0; for(k = 0; k < pb_graph_node->num_input_ports; k++) { for(m = 0; m < pb_graph_node->num_input_pins[k]; m++) { index = pb_graph_node->input_pins[k][m].pin_count_in_cluster; rr_node[pb_graph_rr_index].edges[ipin + pb_graph_node->output_pins[i][j].num_output_edges] = index; rr_node[pb_graph_rr_index].switches[ipin + pb_graph_node->output_pins[i][j].num_output_edges] = arch->num_switches - 1; ipin++; } } for(k = 0; k < pb_type->num_output_pins; k++) { index = k + pb_type->num_input_pins + pb_graph_node->total_pb_pins; rr_node[pb_graph_rr_index].edges[k + pb_type->num_input_pins + pb_graph_node->output_pins[i][j].num_output_edges] = index; rr_node[pb_graph_rr_index].switches[k + pb_type->num_input_pins + pb_graph_node->output_pins[i][j].num_output_edges] = arch->num_switches - 1; } rr_node[pb_graph_rr_index].num_edges += pb_type->num_output_pins + pb_type->num_input_pins; rr_node[pb_graph_rr_index].pack_intrinsic_cost = 1 + (float)rr_node[pb_graph_rr_index].num_edges / 5; /* need to normalize better than 5 */ } } ipin = 0; for(i = 0; i < pb_graph_node->num_clock_ports; i++) { for(j = 0; j < pb_graph_node->num_clock_pins[i]; j++) { for(k = 0; k < pb_type->num_clock_pins; k++) { index = k + pb_type->num_input_pins + pb_type->num_output_pins + pb_graph_node->total_pb_pins; pb_graph_rr_index = pb_graph_node->clock_pins[i][j].pin_count_in_cluster; rr_node[index].edges[ipin] = pb_graph_rr_index; rr_node[index].switches[ipin] = arch->num_switches - 1; } ipin++; } } alloc_and_load_rr_node_route_structs(); num_nets_in_cluster = 0; }
void alloc_and_load_rr_graph_for_pb_graph_node | ( | INP t_pb_graph_node * | pb_graph_node, |
INP const t_arch * | arch, | ||
int | mode | ||
) |
Definition at line 254 of file cluster_legality.c.
{ int i, j, k, index; boolean is_primitive; is_primitive = (pb_graph_node->pb_type->num_modes == 0); for(i = 0; i < pb_graph_node->num_input_ports; i++) { for(j = 0; j < pb_graph_node->num_input_pins[i]; j++) { index = pb_graph_node->input_pins[i][j].pin_count_in_cluster; rr_node[index].pb_graph_pin = &pb_graph_node->input_pins[i][j]; rr_node[index].fan_in = pb_graph_node->input_pins[i][j].num_input_edges; rr_node[index].num_edges = pb_graph_node->input_pins[i][j].num_output_edges; rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */ rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int)); rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int)); rr_node[index].net_num = OPEN; rr_node[index].prev_node = OPEN; rr_node[index].prev_edge = OPEN; if(mode == 0) { /* default mode is the first mode */ rr_node[index].capacity = 1; } else { rr_node[index].capacity = 0; } for(k = 0; k < pb_graph_node->input_pins[i][j].num_output_edges; k++) { /* TODO: Intention was to do bus-based implementation here */ rr_node[index].edges[k] = pb_graph_node->input_pins[i][j].output_edges[k]->output_pins[0]->pin_count_in_cluster; rr_node[index].switches[k] = arch->num_switches - 1; /* last switch in arch switch properties is a delayless switch */ assert(pb_graph_node->input_pins[i][j].output_edges[k]->num_output_pins == 1); } rr_node[index].type = INTRA_CLUSTER_EDGE; if(is_primitive) { rr_node[index].type = SINK; } } } for(i = 0; i < pb_graph_node->num_output_ports; i++) { for(j = 0; j < pb_graph_node->num_output_pins[i]; j++) { index = pb_graph_node->output_pins[i][j].pin_count_in_cluster; rr_node[index].pb_graph_pin = &pb_graph_node->output_pins[i][j]; rr_node[index].fan_in = pb_graph_node->output_pins[i][j].num_input_edges; rr_node[index].num_edges = pb_graph_node->output_pins[i][j].num_output_edges; rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */ rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int)); rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int)); rr_node[index].net_num = OPEN; rr_node[index].prev_node = OPEN; rr_node[index].prev_edge = OPEN; if(mode == 0) { /* Default mode is the first mode */ rr_node[index].capacity = 1; } else { rr_node[index].capacity = 0; } for(k = 0; k < pb_graph_node->output_pins[i][j].num_output_edges; k++) { /* TODO: Intention was to do bus-based implementation here */ rr_node[index].edges[k] = pb_graph_node->output_pins[i][j].output_edges[k]->output_pins[0]->pin_count_in_cluster; rr_node[index].switches[k] = arch->num_switches - 1; assert(pb_graph_node->output_pins[i][j].output_edges[k]->num_output_pins == 1); } rr_node[index].type = INTRA_CLUSTER_EDGE; if(is_primitive) { rr_node[index].type = SOURCE; } } } for(i = 0; i < pb_graph_node->num_clock_ports; i++) { for(j = 0; j < pb_graph_node->num_clock_pins[i]; j++) { index = pb_graph_node->clock_pins[i][j].pin_count_in_cluster; rr_node[index].pb_graph_pin = &pb_graph_node->clock_pins[i][j]; rr_node[index].fan_in = pb_graph_node->clock_pins[i][j].num_input_edges; rr_node[index].num_edges = pb_graph_node->clock_pins[i][j].num_output_edges; rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */ rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int)); rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int)); rr_node[index].net_num = OPEN; rr_node[index].prev_node = OPEN; rr_node[index].prev_edge = OPEN; if(mode == 0) { /* default mode is the first mode (useful for routing */ rr_node[index].capacity = 1; } else { rr_node[index].capacity = 0; } for(k = 0; k < pb_graph_node->clock_pins[i][j].num_output_edges; k++) { /* TODO: Intention was to do bus-based implementation here */ rr_node[index].edges[k] = pb_graph_node->clock_pins[i][j].output_edges[k]->output_pins[0]->pin_count_in_cluster; rr_node[index].switches[k] = arch->num_switches - 1; assert(pb_graph_node->clock_pins[i][j].output_edges[k]->num_output_pins == 1); } rr_node[index].type = INTRA_CLUSTER_EDGE; if(is_primitive) { rr_node[index].type = SINK; } } } for(i = 0; i < pb_graph_node->pb_type->num_modes; i++) { for(j = 0; j < pb_graph_node->pb_type->modes[i].num_pb_type_children; j++) { for(k = 0; k < pb_graph_node->pb_type->modes[i].pb_type_children[j].num_pb; k++) { alloc_and_load_rr_graph_for_pb_graph_node(&pb_graph_node->child_pb_graph_nodes[i][j][k], arch, i); } } } }
void free_cluster_legality_checker | ( | ) |
Definition at line 235 of file cluster_legality.c.
{ int inet; free(rr_indexed_data); free_rr_node_route_structs(); free_route_structs(NULL); free_trace_structs(); free_chunk_memory(rr_mem_chunk_list_head); rr_mem_chunk_list_head = NULL; for(inet = 0; inet < num_logical_nets; inet++) { free(saved_net_rr_terminals[inet]); } free(net_rr_terminals); free(saved_net_rr_terminals); }
void free_legalizer_for_cluster | ( | INP t_block * | clb | ) |
Definition at line 500 of file cluster_legality.c.
{ int i; free_rr_node_route_structs(); for(i = 0; i < num_rr_nodes; i++) { if(clb->pb->rr_graph[i].edges != NULL) { free(clb->pb->rr_graph[i].edges); } if(clb->pb->rr_graph[i].switches != NULL) { free(clb->pb->rr_graph[i].switches); } } free(clb->pb->rr_graph); }
boolean is_pin_open | ( | int | i | ) |
Definition at line 1051 of file cluster_legality.c.
{ return (rr_node[i].occ == 0); }
void reset_legalizer_for_cluster | ( | t_block * | clb | ) |
Definition at line 515 of file cluster_legality.c.
{ int i; for(i = 0; i < num_nets_in_cluster; i++) { free_traceback(nets_in_cluster[i]); trace_head[nets_in_cluster[i]] = best_routing[nets_in_cluster[i]]; free_traceback(nets_in_cluster[i]); best_routing[nets_in_cluster[i]] = NULL; } free_rr_node_route_structs(); num_nets_in_cluster = 0; }
void save_cluster_solution | ( | ) |
This routine updates the occupancy and pres_cost of the rr_nodes that are affected by the portion of the routing of one net that starts at route_segment_start. If route_segment_start is trace_head[inet], the cost of all the nodes in the routing of net inet are updated. If add_or_sub is -1 the net (or net portion) is ripped up, if it is 1 the net is added to the routing. The size of pres_fac determines how severly oversubscribed rr_nodes are penalized.
Definition at line 1002 of file cluster_legality.c.
{ int i, j, net_index; struct s_trace *tptr, *prev; int inode; for(i = 0; i < max_ext_index; i++) { rr_node[i].net_num = OPEN; rr_node[i].prev_edge = OPEN; rr_node[i].prev_node = OPEN; } for(i = 0; i < num_nets_in_cluster; i++) { prev = NULL; net_index = nets_in_cluster[i]; tptr = trace_head[net_index]; if(tptr == NULL) /* No routing yet. */ return; for(;;) { inode = tptr->index; rr_node[inode].net_num = net_index; if(prev != NULL) { rr_node[inode].prev_node = prev->index; for(j = 0; j < rr_node[prev->index].num_edges; j++) { if(rr_node[prev->index].edges[j] == inode) { rr_node[inode].prev_edge = j; break; } } assert(j != rr_node[prev->index].num_edges); } else { rr_node[inode].prev_node = OPEN; rr_node[inode].prev_edge = OPEN; } if(rr_node[inode].type == SINK) { tptr = tptr->next; /* Skip next segment. */ if(tptr == NULL) break; } prev = tptr; tptr = tptr->next; } /* End while loop -- did an entire traceback. */ } }
void set_pb_mode | ( | t_pb * | pb, |
int | mode, | ||
int | isOn | ||
) |
turns on mode for a pb by setting capacity of its rr_nodes to 1
Definition at line 1112 of file cluster_legality.c.
{ int i, j, index; int i_pb_type, i_pb_inst; const t_pb_type *pb_type; t_pb_graph_node *pb_graph_node; pb_type = pb->pb_graph_node->pb_type; for(i_pb_type = 0; i_pb_type < pb_type->modes[mode].num_pb_type_children; i_pb_type++) { for(i_pb_inst = 0; i_pb_inst < pb_type->modes[mode].pb_type_children[i_pb_type].num_pb; i_pb_inst++) { pb_graph_node = &pb->pb_graph_node->child_pb_graph_nodes[mode][i_pb_type][i_pb_inst]; for(i = 0; i < pb_graph_node->num_input_ports; i++) { for(j = 0; j < pb_graph_node->num_input_pins[i]; j++) { index = pb_graph_node->input_pins[i][j].pin_count_in_cluster; rr_node[index].capacity = isOn; } } for(i = 0; i < pb_graph_node->num_output_ports; i++) { for(j = 0; j < pb_graph_node->num_output_pins[i]; j++) { index = pb_graph_node->output_pins[i][j].pin_count_in_cluster; rr_node[index].capacity = isOn; } } for(i = 0; i < pb_graph_node->num_clock_ports; i++) { for(j = 0; j < pb_graph_node->num_clock_pins[i]; j++) { index = pb_graph_node->clock_pins[i][j].pin_count_in_cluster; rr_node[index].capacity = isOn; } } } } }
enum e_block_pack_status try_add_block_to_current_cluster_and_primitive | ( | INP int | iblock, |
INP t_pb * | primitive | ||
) |
Allocates and loads the net_rr_terminals data structure. For each net it stores the rr_node index of the SOURCE of the net and all the SINKs of the net. [0..num_logical_nets-1][0..num_pins-1].
Definition at line 825 of file cluster_legality.c.
{ int ipin, iblk_net; int orig_num_nets_in_cluster; boolean success, found; t_model_ports *port; success = FALSE; found = FALSE; assert(primitive->pb_graph_node->pb_type->num_modes == 0); /* check if primitive */ assert(primitive->logical_block == OPEN && logical_block[iblock].clb_index == NO_CLUSTER); /* check if primitive and block is open */ /* check if block type matches primitive type */ if(logical_block[iblock].model != primitive->pb_graph_node->pb_type->model) { /* End early, model is incompatible */ return BLK_FAILED_FEASIBLE; } orig_num_nets_in_cluster = num_nets_in_cluster; save_and_reset_routing_cluster(); /* try pack it in */ assert(primitive->logical_block == OPEN); assert(logical_block[iblock].clb_index == NO_CLUSTER); primitive->logical_block = iblock; logical_block[iblock].pb = primitive; logical_block[iblock].clb_index = curr_cluster_index; /* for each net of logical block, check if it is in cluster, if not add it */ /* also check if pins on primitive can fit logical block */ port = logical_block[iblock].model->inputs; success = TRUE; while(port && success) { for(ipin = 0; ipin < port->size && success; ipin++) { if(port->is_clock) { assert(port->size == 1); iblk_net = logical_block[iblock].clock_net; } else { iblk_net = logical_block[iblock].input_nets[port->index][ipin]; } if(iblk_net == OPEN) { continue; } if(!is_net_in_cluster(iblk_net)){ nets_in_cluster[num_nets_in_cluster] = iblk_net; num_nets_in_cluster++; } success = add_net_rr_terminal_cluster(iblk_net, primitive, iblock, port, ipin); } port = port->next; } port = logical_block[iblock].model->outputs; while(port && success) { for(ipin = 0; ipin < port->size && success; ipin++) { iblk_net = logical_block[iblock].output_nets[port->index][ipin]; if(iblk_net == OPEN) { continue; } if(!is_net_in_cluster(iblk_net)){ nets_in_cluster[num_nets_in_cluster] = iblk_net; num_nets_in_cluster++; } success = add_net_rr_terminal_cluster(iblk_net, primitive, iblock, port, ipin); } port = port->next; } if(success) { success = reload_ext_net_rr_terminal_cluster(); } if(success) { /* route it */ reset_rr_node_route_structs(); /* Clear all prior rr_graph history */ success = try_breadth_first_route_cluster(); /* route from scratch */ } if(success) { return BLK_PASSED; } else { /* Cannot pack, restore cluster */ primitive->logical_block = OPEN; logical_block[iblock].pb = NULL; logical_block[iblock].clb_index = NO_CLUSTER; restore_routing_cluster(); num_nets_in_cluster = orig_num_nets_in_cluster; return BLK_FAILED_ROUTE; } }