VPR-6.0

vpr/SRC/pack/cluster_legality.h File Reference

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

void alloc_and_load_cluster_legality_checker ()
void alloc_and_load_legalizer_for_cluster (INP t_block *clb, INP int clb_index, INP const t_arch *arch)
void free_legalizer_for_cluster (INP t_block *clb)
void free_cluster_legality_checker ()
void reset_legalizer_for_cluster (t_block *clb)
enum e_block_pack_status try_add_block_to_current_cluster_and_primitive (INP int logical_block, INP t_pb *primitive)
void save_cluster_solution ()
boolean is_pin_open (int i)
void set_pb_mode (t_pb *pb, int mode, int isOn)
void alloc_and_load_rr_graph_for_pb_graph_node (INP t_pb_graph_node *pb_graph_node, INP const t_arch *arch, int mode)

Function Documentation

void alloc_and_load_cluster_legality_checker ( )

Legalizes routing for a cluster

Definition at line 218 of file cluster_legality.c.

                                               {
        best_routing = (struct s_trace **)my_calloc(num_logical_nets, sizeof(struct s_trace *));
        nets_in_cluster = my_malloc(num_logical_nets * sizeof(int));
        num_nets_in_cluster = 0;
        num_nets = num_logical_nets;

        /* inside a cluster, I do not consider rr_indexed_data cost, set to 1 since other costs are multiplied by it */
        num_rr_indexed_data = 1;
        rr_indexed_data = my_calloc(1, sizeof(t_rr_indexed_data));
        rr_indexed_data[0].base_cost = 1;

        /* alloc routing structures */
        alloc_route_static_structs();
        alloc_net_rr_terminals_cluster();
}

Here is the call graph for this function:

Here is the caller graph for this function:

void alloc_and_load_legalizer_for_cluster ( INP t_block clb,
INP int  clb_index,
INP const t_arch arch 
)

Structure: Model external routing and internal routing

1. Model external routing num input pins == num external sources for input pins, fully connect them to input pins (simulates external routing) num output pins == num external sinks for output pins, fully connect them to output pins (simulates external routing) num clock pins == num external sources for clock pins, fully connect them to clock pins (simulates external routing) 2. Model internal routing

Definition at line 372 of file cluster_legality.c.

                                                                                                       {
    /* make each rr_node one correspond with pin and correspond with pin's index pin_count_in_cluster */
    int i, j, k, m, index, pb_graph_rr_index;
        int count_pins;
        t_pb_type * pb_type;
        t_pb_graph_node *pb_graph_node;
        int ipin;

        /* Create rr_graph */
        pb_type = clb->type->pb_type;
        pb_graph_node = clb->type->pb_graph_head;
        num_rr_nodes = pb_graph_node->total_pb_pins + pb_type->num_input_pins + 
                                        pb_type->num_output_pins + pb_type->num_clock_pins;
        if(num_rr_nodes > num_rr_intrinsic_cost) {
                free(rr_intrinsic_cost);
                rr_intrinsic_cost = my_calloc(num_rr_nodes, sizeof(float));
                num_rr_intrinsic_cost = num_rr_nodes;
        }
        rr_node = my_calloc(num_rr_nodes, sizeof(t_rr_node));
        clb->pb->rr_graph = rr_node;
        
        alloc_and_load_rr_graph_for_pb_graph_node(pb_graph_node, arch, 0);

        curr_cluster_index = clb_index;

        /*   Alloc and load rr_graph external sources and sinks */
        ext_input_rr_node_index = pb_graph_node->total_pb_pins;
        ext_output_rr_node_index = pb_type->num_input_pins + pb_graph_node->total_pb_pins;
        ext_clock_rr_node_index = pb_type->num_input_pins + pb_type->num_output_pins + pb_graph_node->total_pb_pins;
        max_ext_index = pb_type->num_input_pins + pb_type->num_output_pins + pb_type->num_clock_pins + pb_graph_node->total_pb_pins;
        
        for(i = 0; i < pb_type->num_input_pins; i++) {
                index = i + pb_graph_node->total_pb_pins;
                rr_node[index].type = SOURCE;
                rr_node[index].fan_in = 0;
                rr_node[index].num_edges = pb_type->num_input_pins;
                rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
                rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int));
                rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int));
                rr_node[index].capacity = 1;
                rr_intrinsic_cost[index] = 0;
        }

        for(i = 0; i < pb_type->num_output_pins; i++) {
                index = i + pb_type->num_input_pins + pb_graph_node->total_pb_pins;
                rr_node[index].type = SINK;
                rr_node[index].fan_in = pb_type->num_output_pins;
                rr_node[index].num_edges = 0;
                rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
                rr_node[index].capacity = 1;
                rr_intrinsic_cost[index] = 0;
        }

        for(i = 0; i < pb_type->num_clock_pins; i++) {
                index = i + pb_type->num_input_pins + pb_type->num_output_pins + pb_graph_node->total_pb_pins;
                rr_node[index].type = SOURCE;
                rr_node[index].fan_in = 0;
                rr_node[index].num_edges = pb_type->num_clock_pins;
                rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
                rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int));
                rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int));
                rr_node[index].capacity = 1;
                rr_intrinsic_cost[index] = 0;
        }

        ipin = 0;
        for(i = 0; i < pb_graph_node->num_input_ports; i++) {
                for(j = 0; j < pb_graph_node->num_input_pins[i]; j++) {
                        pb_graph_rr_index = pb_graph_node->input_pins[i][j].pin_count_in_cluster;
                        for(k = 0; k < pb_type->num_input_pins; k++) {
                                index = k + pb_graph_node->total_pb_pins;
                                rr_node[index].edges[ipin] = pb_graph_rr_index; 
                                rr_node[index].switches[ipin] = arch->num_switches - 1;
                        }
                        rr_node[pb_graph_rr_index].pack_intrinsic_cost = MAX_SHORT; /* using an input pin should be made costly */
                        ipin++;
                }
        }

        /* Must attach output pins to input pins because if a connection cannot fit using intra-cluster routing, it can also use external routing */
        for(i = 0; i < pb_graph_node->num_output_ports; i++) {
                for(j = 0; j < pb_graph_node->num_output_pins[i]; j++) {
                        count_pins = pb_graph_node->output_pins[i][j].num_output_edges + pb_type->num_output_pins + pb_type->num_input_pins;
                        pb_graph_rr_index = pb_graph_node->output_pins[i][j].pin_count_in_cluster;
                        rr_node[pb_graph_rr_index].edges = my_realloc(rr_node[pb_graph_rr_index].edges,
                                (count_pins) * sizeof(int));
                        rr_node[pb_graph_rr_index].switches = my_realloc(rr_node[pb_graph_rr_index].switches,
                                (count_pins) * sizeof(int));

                        ipin = 0;
                        for(k = 0; k < pb_graph_node->num_input_ports; k++) {
                                for(m = 0; m < pb_graph_node->num_input_pins[k]; m++) {
                                        index = pb_graph_node->input_pins[k][m].pin_count_in_cluster;
                                        rr_node[pb_graph_rr_index].edges[ipin + pb_graph_node->output_pins[i][j].num_output_edges] = index;
                                        rr_node[pb_graph_rr_index].switches[ipin + pb_graph_node->output_pins[i][j].num_output_edges] = arch->num_switches - 1;
                                        ipin++;
                                }
                        }
                        for(k = 0; k < pb_type->num_output_pins; k++) {
                                index = k + pb_type->num_input_pins + pb_graph_node->total_pb_pins;
                                rr_node[pb_graph_rr_index].edges[k + pb_type->num_input_pins + pb_graph_node->output_pins[i][j].num_output_edges] = index;
                                rr_node[pb_graph_rr_index].switches[k + pb_type->num_input_pins + pb_graph_node->output_pins[i][j].num_output_edges] = arch->num_switches - 1;
                        }
                        rr_node[pb_graph_rr_index].num_edges += pb_type->num_output_pins + pb_type->num_input_pins;
                        rr_node[pb_graph_rr_index].pack_intrinsic_cost = 1 + (float)rr_node[pb_graph_rr_index].num_edges / 5; /* need to normalize better than 5 */
                }
        }

        ipin = 0;
        for(i = 0; i < pb_graph_node->num_clock_ports; i++) {
                for(j = 0; j < pb_graph_node->num_clock_pins[i]; j++) {
                        for(k = 0; k < pb_type->num_clock_pins; k++) {
                                index = k + pb_type->num_input_pins + pb_type->num_output_pins + pb_graph_node->total_pb_pins;
                                pb_graph_rr_index = pb_graph_node->clock_pins[i][j].pin_count_in_cluster;
                                rr_node[index].edges[ipin] = pb_graph_rr_index; 
                                rr_node[index].switches[ipin] = arch->num_switches - 1;
                        }
                        ipin++;
                }
        }

        alloc_and_load_rr_node_route_structs();
        num_nets_in_cluster = 0;

}

Here is the call graph for this function:

Here is the caller graph for this function:

void alloc_and_load_rr_graph_for_pb_graph_node ( INP t_pb_graph_node pb_graph_node,
INP const t_arch arch,
int  mode 
)

Definition at line 254 of file cluster_legality.c.

                                                                                                                     {

        int i, j, k, index;
        boolean is_primitive;

        is_primitive = (pb_graph_node->pb_type->num_modes == 0);

        for(i = 0; i < pb_graph_node->num_input_ports; i++) {
                for(j = 0; j < pb_graph_node->num_input_pins[i]; j++) {
                        index = pb_graph_node->input_pins[i][j].pin_count_in_cluster;
                        rr_node[index].pb_graph_pin = &pb_graph_node->input_pins[i][j];
                        rr_node[index].fan_in = pb_graph_node->input_pins[i][j].num_input_edges;
                        rr_node[index].num_edges = pb_graph_node->input_pins[i][j].num_output_edges;
                        rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
                        rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int));
                        rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int));
                        rr_node[index].net_num = OPEN;
                        rr_node[index].prev_node = OPEN;
                        rr_node[index].prev_edge = OPEN;
                        if(mode == 0) { /* default mode is the first mode */
                                rr_node[index].capacity = 1;
                        } else {
                                rr_node[index].capacity = 0;
                        }
                        for(k = 0; k < pb_graph_node->input_pins[i][j].num_output_edges; k++) {
                                /* TODO: Intention was to do bus-based implementation here */
                                rr_node[index].edges[k] = pb_graph_node->input_pins[i][j].output_edges[k]->output_pins[0]->pin_count_in_cluster;
                                rr_node[index].switches[k] = arch->num_switches - 1; /* last switch in arch switch properties is a delayless switch */
                                assert(pb_graph_node->input_pins[i][j].output_edges[k]->num_output_pins == 1);
                        }
                        rr_node[index].type = INTRA_CLUSTER_EDGE;
                        if(is_primitive) {
                                rr_node[index].type = SINK;
                        }
                }
        }

        for(i = 0; i < pb_graph_node->num_output_ports; i++) {
                for(j = 0; j < pb_graph_node->num_output_pins[i]; j++) {
                        index = pb_graph_node->output_pins[i][j].pin_count_in_cluster;
                        rr_node[index].pb_graph_pin = &pb_graph_node->output_pins[i][j];
                        rr_node[index].fan_in = pb_graph_node->output_pins[i][j].num_input_edges;
                        rr_node[index].num_edges = pb_graph_node->output_pins[i][j].num_output_edges;
                        rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
                        rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int));
                        rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int));
                        rr_node[index].net_num = OPEN;
                        rr_node[index].prev_node = OPEN;
                        rr_node[index].prev_edge = OPEN;
                        if(mode == 0) { /* Default mode is the first mode */
                                rr_node[index].capacity = 1;
                        } else {
                                rr_node[index].capacity = 0;
                        }
                        for(k = 0; k < pb_graph_node->output_pins[i][j].num_output_edges; k++) {
                                /* TODO: Intention was to do bus-based implementation here */
                                rr_node[index].edges[k] = pb_graph_node->output_pins[i][j].output_edges[k]->output_pins[0]->pin_count_in_cluster;
                                rr_node[index].switches[k] = arch->num_switches - 1;
                                assert(pb_graph_node->output_pins[i][j].output_edges[k]->num_output_pins == 1);
                        }
                        rr_node[index].type = INTRA_CLUSTER_EDGE;
                        if(is_primitive) {
                                rr_node[index].type = SOURCE;
                        }
                }
        }

        for(i = 0; i < pb_graph_node->num_clock_ports; i++) {
                for(j = 0; j < pb_graph_node->num_clock_pins[i]; j++) {
                        index = pb_graph_node->clock_pins[i][j].pin_count_in_cluster;
                        rr_node[index].pb_graph_pin = &pb_graph_node->clock_pins[i][j];
                        rr_node[index].fan_in = pb_graph_node->clock_pins[i][j].num_input_edges;
                        rr_node[index].num_edges = pb_graph_node->clock_pins[i][j].num_output_edges;
                        rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
                        rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int));
                        rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int));
                        rr_node[index].net_num = OPEN;
                        rr_node[index].prev_node = OPEN;
                        rr_node[index].prev_edge = OPEN;
                        if(mode == 0) { /* default mode is the first mode (useful for routing */
                                rr_node[index].capacity = 1;
                        } else {
                                rr_node[index].capacity = 0;
                        }
                        for(k = 0; k < pb_graph_node->clock_pins[i][j].num_output_edges; k++) {
                                /* TODO: Intention was to do bus-based implementation here */
                                rr_node[index].edges[k] = pb_graph_node->clock_pins[i][j].output_edges[k]->output_pins[0]->pin_count_in_cluster;
                                rr_node[index].switches[k] = arch->num_switches - 1;
                                assert(pb_graph_node->clock_pins[i][j].output_edges[k]->num_output_pins == 1);
                        }
                        rr_node[index].type = INTRA_CLUSTER_EDGE;
                        if(is_primitive) {
                                rr_node[index].type = SINK;
                        }
                }
        }

        for(i = 0; i < pb_graph_node->pb_type->num_modes; i++) {
                for(j = 0; j < pb_graph_node->pb_type->modes[i].num_pb_type_children; j++) {
                        for(k = 0; k < pb_graph_node->pb_type->modes[i].pb_type_children[j].num_pb; k++) {
                                alloc_and_load_rr_graph_for_pb_graph_node(&pb_graph_node->child_pb_graph_nodes[i][j][k], arch, i);
                        }
                }
        }

}

Here is the call graph for this function:

Here is the caller graph for this function:

void free_cluster_legality_checker ( )

Definition at line 235 of file cluster_legality.c.

Here is the call graph for this function:

Here is the caller graph for this function:

void free_legalizer_for_cluster ( INP t_block clb)

Definition at line 500 of file cluster_legality.c.

                                                  {
        int i;

        free_rr_node_route_structs();
        for(i = 0; i < num_rr_nodes; i++) {
                if(clb->pb->rr_graph[i].edges != NULL) {
                        free(clb->pb->rr_graph[i].edges);
                }
                if(clb->pb->rr_graph[i].switches != NULL) {
                        free(clb->pb->rr_graph[i].switches);
                }
        }
        free(clb->pb->rr_graph);
}

Here is the call graph for this function:

Here is the caller graph for this function:

boolean is_pin_open ( int  i)

Definition at line 1051 of file cluster_legality.c.

                           {
        return (rr_node[i].occ == 0);
}
void reset_legalizer_for_cluster ( t_block clb)

Definition at line 515 of file cluster_legality.c.

                                               {
        int i;
        for(i = 0; i < num_nets_in_cluster; i++) {
                free_traceback(nets_in_cluster[i]);
                trace_head[nets_in_cluster[i]] = best_routing[nets_in_cluster[i]];
                free_traceback(nets_in_cluster[i]);             
                best_routing[nets_in_cluster[i]] = NULL;
        }

        free_rr_node_route_structs();
        num_nets_in_cluster = 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void save_cluster_solution ( )

This routine updates the occupancy and pres_cost of the rr_nodes that are affected by the portion of the routing of one net that starts at route_segment_start. If route_segment_start is trace_head[inet], the cost of all the nodes in the routing of net inet are updated. If add_or_sub is -1 the net (or net portion) is ripped up, if it is 1 the net is added to the routing. The size of pres_fac determines how severly oversubscribed rr_nodes are penalized.

Definition at line 1002 of file cluster_legality.c.

{
        int i, j, net_index;
    struct s_trace *tptr, *prev;
    int inode;
        for(i = 0; i < max_ext_index; i++) {
                rr_node[i].net_num = OPEN;
                rr_node[i].prev_edge = OPEN;
                rr_node[i].prev_node = OPEN;
        }
        for(i = 0; i < num_nets_in_cluster; i++) {
                prev = NULL;
                net_index = nets_in_cluster[i];
                tptr = trace_head[net_index];
                if(tptr == NULL)                /* No routing yet. */
                return;

                for(;;)
                {
                        inode = tptr->index;
                        rr_node[inode].net_num = net_index;
                        if(prev != NULL) {
                                rr_node[inode].prev_node = prev->index;
                                for(j = 0; j < rr_node[prev->index].num_edges; j++) {
                                        if(rr_node[prev->index].edges[j] == inode) {
                                                rr_node[inode].prev_edge = j;
                                                break;
                                        }
                                }
                                assert(j != rr_node[prev->index].num_edges);
                        } else {
                                rr_node[inode].prev_node = OPEN;
                                rr_node[inode].prev_edge = OPEN;
                        }

                        if(rr_node[inode].type == SINK)
                        {
                                tptr = tptr->next;      /* Skip next segment. */
                                if(tptr == NULL)
                                break;
                        }

                        prev = tptr;
                        tptr = tptr->next;

                }                       /* End while loop -- did an entire traceback. */
        }
}

Here is the caller graph for this function:

void set_pb_mode ( t_pb pb,
int  mode,
int  isOn 
)

turns on mode for a pb by setting capacity of its rr_nodes to 1

Definition at line 1112 of file cluster_legality.c.

                                               {
        int i, j, index;
        int i_pb_type, i_pb_inst;
        const t_pb_type *pb_type;
        t_pb_graph_node *pb_graph_node;

        pb_type = pb->pb_graph_node->pb_type;
        for(i_pb_type = 0; i_pb_type < pb_type->modes[mode].num_pb_type_children; i_pb_type++) {
                for(i_pb_inst = 0; i_pb_inst < pb_type->modes[mode].pb_type_children[i_pb_type].num_pb; i_pb_inst++) {
                        pb_graph_node = &pb->pb_graph_node->child_pb_graph_nodes[mode][i_pb_type][i_pb_inst];
                        for(i = 0; i < pb_graph_node->num_input_ports; i++) {
                                for(j = 0; j < pb_graph_node->num_input_pins[i]; j++) {
                                        index = pb_graph_node->input_pins[i][j].pin_count_in_cluster;
                                        rr_node[index].capacity = isOn;
                                }
                        }

                        for(i = 0; i < pb_graph_node->num_output_ports; i++) {
                                for(j = 0; j < pb_graph_node->num_output_pins[i]; j++) {
                                        index = pb_graph_node->output_pins[i][j].pin_count_in_cluster;
                                        rr_node[index].capacity = isOn;                 
                                }
                        }

                        for(i = 0; i < pb_graph_node->num_clock_ports; i++) {
                                for(j = 0; j < pb_graph_node->num_clock_pins[i]; j++) {
                                        index = pb_graph_node->clock_pins[i][j].pin_count_in_cluster;
                                        rr_node[index].capacity = isOn;
                                }
                        }       
                }
        }
}

Here is the caller graph for this function:

enum e_block_pack_status try_add_block_to_current_cluster_and_primitive ( INP int  iblock,
INP t_pb primitive 
)

Allocates and loads the net_rr_terminals data structure. For each net it stores the rr_node index of the SOURCE of the net and all the SINKs of the net. [0..num_logical_nets-1][0..num_pins-1].

Definition at line 825 of file cluster_legality.c.

{
    int ipin, iblk_net;
        int orig_num_nets_in_cluster;
        boolean success, found;
        t_model_ports *port;

        success = FALSE;
        found = FALSE;

        assert(primitive->pb_graph_node->pb_type->num_modes == 0); /* check if primitive */
        assert(primitive->logical_block == OPEN && logical_block[iblock].clb_index == NO_CLUSTER); /* check if primitive and block is open */

        /* check if block type matches primitive type */
        if(logical_block[iblock].model != primitive->pb_graph_node->pb_type->model) {
                /* End early, model is incompatible */
                return BLK_FAILED_FEASIBLE;
        }

        orig_num_nets_in_cluster = num_nets_in_cluster;
        save_and_reset_routing_cluster();

        /* try pack it in */
        assert(primitive->logical_block == OPEN);
        assert(logical_block[iblock].clb_index == NO_CLUSTER);

        primitive->logical_block = iblock;
        logical_block[iblock].pb = primitive;
        logical_block[iblock].clb_index = curr_cluster_index;
        
        /* for each net of logical block, check if it is in cluster, if not add it */
        /*   also check if pins on primitive can fit logical block */
        
        port = logical_block[iblock].model->inputs;
        success = TRUE;
        
        while(port && success) {
                for(ipin = 0; ipin < port->size && success; ipin++) {
                        if(port->is_clock) {
                                assert(port->size == 1);
                                iblk_net = logical_block[iblock].clock_net;
                        } else {
                                iblk_net = logical_block[iblock].input_nets[port->index][ipin];
                        }
                        if(iblk_net == OPEN) {
                                continue;
                        }
                        if(!is_net_in_cluster(iblk_net)){
                                nets_in_cluster[num_nets_in_cluster] = iblk_net;
                                num_nets_in_cluster++;
                        }
                        success = add_net_rr_terminal_cluster(iblk_net, primitive, iblock, port, ipin);
                }
                port = port->next;
        }

        port = logical_block[iblock].model->outputs;
        while(port && success) {
                for(ipin = 0; ipin < port->size && success; ipin++) {
                        iblk_net = logical_block[iblock].output_nets[port->index][ipin];
                        if(iblk_net == OPEN) {
                                continue;
                        }
                        if(!is_net_in_cluster(iblk_net)){
                                nets_in_cluster[num_nets_in_cluster] = iblk_net;
                                num_nets_in_cluster++;
                        }
                        success = add_net_rr_terminal_cluster(iblk_net, primitive, iblock, port, ipin);
                }
                port = port->next;
        }

        if(success) {
                success = reload_ext_net_rr_terminal_cluster();
        }

        if(success) {
                /* route it */
                reset_rr_node_route_structs(); /* Clear all prior rr_graph history */
                success = try_breadth_first_route_cluster(); /* route from scratch */
        }

        if(success) {
                return BLK_PASSED;
        } else {
                /* Cannot pack, restore cluster */
                primitive->logical_block = OPEN;
                logical_block[iblock].pb = NULL;
                logical_block[iblock].clb_index = NO_CLUSTER;
                restore_routing_cluster();
                num_nets_in_cluster = orig_num_nets_in_cluster;
                return BLK_FAILED_ROUTE;
        }
}

Here is the call graph for this function:

Here is the caller graph for this function: