Skip to content

Commit

Permalink
address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
duck2 committed Nov 14, 2024
1 parent a40c0df commit b382e74
Show file tree
Hide file tree
Showing 11 changed files with 71 additions and 49 deletions.
14 changes: 10 additions & 4 deletions vpr/src/route/DecompNetlistRouter.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

/** @file Parallel and net-decomposing case for NetlistRouter. Works like
* \see ParallelNetlistRouter, but tries to "decompose" nets and assign them to
* the next level of the partition tree where possible. */
* the next level of the partition tree where possible.
* See "Parallel FPGA Routing with On-the-Fly Net Decomposition", FPT'24 */
#include "netlist_routers.h"

#include <tbb/task_group.h>
Expand Down Expand Up @@ -57,6 +58,7 @@ class DecompNetlistRouter : public NetlistRouter {
* \ref route_net for each net, which will handle other global updates.
* \return RouteIterResults for this iteration. */
RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
/** Inform the PartitionTree of the nets with updated bounding boxes */
void handle_bb_updated_nets(const std::vector<ParentNetId>& nets);
/** Set RCV enable flag for all routers managed by this netlist router.
* Net decomposition does not work with RCV, so calling this fn with x=true is a fatal error. */
Expand All @@ -66,9 +68,13 @@ class DecompNetlistRouter : public NetlistRouter {
private:
/** Should we decompose this net? */
bool should_decompose_net(ParentNetId net_id, const PartitionTreeNode& node);
/** Get a bitset with sinks to route before net decomposition */
/** Get a bitset of sinks to route before net decomposition. Output bitset is
* [1..num_sinks] where the corresponding index is set to 1 if the sink needs to
* be routed */
vtr::dynamic_bitset<> get_decomposition_mask(ParentNetId net_id, const PartitionTreeNode& node);
/** Get a bitset with sinks to route before virtual net decomposition */
/** Get a bitset of sinks to route before virtual net decomposition. Output bitset is
* [1..num_sinks] where the corresponding index is set to 1 if the sink needs to
* be routed */
vtr::dynamic_bitset<> get_decomposition_mask_vnet(const VirtualNet& vnet, const PartitionTreeNode& node);
/** Decompose and route a regular net. Output the resulting vnets to \p left and \p right.
* \return Success status: true if routing is successful and left and right now contain valid virtual nets: false otherwise. */
Expand Down Expand Up @@ -116,7 +122,7 @@ class DecompNetlistRouter : public NetlistRouter {
float _pres_fac;
float _worst_neg_slack;

/** The partition tree */
/** The partition tree. Holds the groups of nets for each partition */
vtr::optional<PartitionTree> _tree;

/** Sinks to be always sampled for decomposition for each net: [0.._net_list.size()-1]
Expand Down
24 changes: 13 additions & 11 deletions vpr/src/route/DecompNetlistRouter.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,25 @@ inline RouteIterResults DecompNetlistRouter<HeapType>::route_netlist(int itry, f
_pres_fac = pres_fac;
_worst_neg_slack = worst_neg_slack;

vtr::Timer t;
vtr::Timer timer;

/* Organize netlist into a PartitionTree.
* Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */
if(!_tree){
_tree = PartitionTree(_net_list);
PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(t.elapsed_sec()) + " s");
PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(timer.elapsed_sec()) + " s");
}

/* Remove all virtual nets: we will create them for each iteration */
/* Remove all virtual nets: we will create them for each iteration.
* This needs to be done because the partition tree can change between iterations
* due to bounding box updates, which invalidates virtual nets */
_tree->clear_vnets();

/* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */
tbb::task_group g;
route_partition_tree_node(g, _tree->root());
g.wait();
PartitionTreeDebug::log("Routing all nets took " + std::to_string(t.elapsed_sec()) + " s");
tbb::task_group group;
route_partition_tree_node(group, _tree->root());
group.wait();
PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s");

/* Combine results from threads */
RouteIterResults out;
Expand All @@ -52,7 +54,6 @@ inline RouteIterResults DecompNetlistRouter<HeapType>::route_netlist(int itry, f
return out;
}

/* TODO: Handle this in route_netlist */
template<typename HeapType>
void DecompNetlistRouter<HeapType>::handle_bb_updated_nets(const std::vector<ParentNetId>& nets) {
VTR_ASSERT(_tree);
Expand Down Expand Up @@ -139,8 +140,9 @@ inline bool should_decompose_vnet(const VirtualNet& vnet, const PartitionTreeNod
template<typename HeapType>
void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node) {
auto& route_ctx = g_vpr_ctx.mutable_routing();
vtr::Timer t;
vtr::Timer timer;

/* node.nets is an unordered set, copy into vector to sort */
std::vector<ParentNetId> nets(node.nets.begin(), node.nets.end());

/* Sort so that nets with the most sinks are routed first.
Expand Down Expand Up @@ -256,7 +258,7 @@ void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g

PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size())
+ " nets and " + std::to_string(node.vnets.size())
+ " virtual nets routed in " + std::to_string(t.elapsed_sec())
+ " virtual nets routed in " + std::to_string(timer.elapsed_sec())
+ " s");

/* This node is finished: add left & right branches to the task queue */
Expand Down Expand Up @@ -674,7 +676,7 @@ vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_decomposition_mask_vnet
* sinks in the small side and unblock. Add convex hull since we are in a vnet which
* may not have a source at all */
if (inside_bb(tree.root().inode, vnet.clipped_bb)) { /* We have source, no need to sample after reduction in most cases */
bool is_reduced = get_reduction_mask_vnet_with_source(vnet, node.cutline_axis, node.cutline_pos, out);
bool is_reduced = get_reduction_mask_vnet_with_source(vnet, node.cutline_axis, node.cutline_pos, out);
bool source_on_cutline = is_close_to_cutline(tree.root().inode, node.cutline_axis, node.cutline_pos, 1);
if (!is_reduced || source_on_cutline){
convex_hull_downsample(vnet.net_id, vnet.clipped_bb, out);
Expand Down
5 changes: 3 additions & 2 deletions vpr/src/route/ParallelNetlistRouter.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
*
* Note that the parallel router does not support graphical router breakpoints.
*
* [0]: F. Koşar, "A net-decomposing parallel FPGA router", MS thesis, UofT ECE, 2023 */
* [0]: "Parallel FPGA Routing with On-the-Fly Net Decomposition", FPT'24 */
#include "netlist_routers.h"
#include "vtr_optional.h"

Expand Down Expand Up @@ -53,6 +53,7 @@ class ParallelNetlistRouter : public NetlistRouter {
* \ref route_net for each net, which will handle other global updates.
* \return RouteIterResults for this iteration. */
RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
/** Inform the PartitionTree of the nets with updated bounding boxes */
void handle_bb_updated_nets(const std::vector<ParentNetId>& nets);
void set_rcv_enabled(bool x);
void set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info);
Expand Down Expand Up @@ -98,7 +99,7 @@ class ParallelNetlistRouter : public NetlistRouter {
float _pres_fac;
float _worst_neg_slack;

/** The partition tree */
/** The partition tree. Holds the groups of nets for each partition */
vtr::optional<PartitionTree> _tree;
};

Expand Down
18 changes: 9 additions & 9 deletions vpr/src/route/ParallelNetlistRouter.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@ inline RouteIterResults ParallelNetlistRouter<HeapType>::route_netlist(int itry,

/* Organize netlist into a PartitionTree.
* Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */
vtr::Timer t;
vtr::Timer timer;
if(!_tree){
_tree = PartitionTree(_net_list);
PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(t.elapsed_sec()) + " s");
PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(timer.elapsed_sec()) + " s");
}

/* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */
tbb::task_group g;
route_partition_tree_node(g, _tree->root());
g.wait();
PartitionTreeDebug::log("Routing all nets took " + std::to_string(t.elapsed_sec()) + " s");
tbb::task_group group;
route_partition_tree_node(group, _tree->root());
group.wait();
PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s");

/* Combine results from threads */
RouteIterResults out;
Expand All @@ -48,14 +48,15 @@ template<typename HeapType>
void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node) {
auto& route_ctx = g_vpr_ctx.mutable_routing();

/* node.nets is an unordered set, copy into vector to sort */
std::vector<ParentNetId> nets(node.nets.begin(), node.nets.end());

/* Sort so net with most sinks is routed first. */
std::stable_sort(nets.begin(), nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool {
return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size();
});

vtr::Timer t;
vtr::Timer timer;
for (auto net_id : nets) {
auto flags = route_net(
_routers_th.local(),
Expand Down Expand Up @@ -95,7 +96,7 @@ void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group&

PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size())
+ " nets and " + std::to_string(node.vnets.size())
+ " virtual nets routed in " + std::to_string(t.elapsed_sec())
+ " virtual nets routed in " + std::to_string(timer.elapsed_sec())
+ " s");

/* This node is finished: add left & right branches to the task queue */
Expand All @@ -111,7 +112,6 @@ void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group&
}
}

/* TODO: Handle this in route_netlist */
template<typename HeapType>
void ParallelNetlistRouter<HeapType>::handle_bb_updated_nets(const std::vector<ParentNetId>& nets) {
VTR_ASSERT(_tree);
Expand Down
9 changes: 5 additions & 4 deletions vpr/src/route/SerialNetlistRouter.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, f
auto& route_ctx = g_vpr_ctx.mutable_routing();
RouteIterResults out;

vtr::Timer t;
vtr::Timer timer;

/* Sort so net with most sinks is routed first */
auto sorted_nets = std::vector<ParentNetId>(_net_list.nets().begin(), _net_list.nets().end());
Expand Down Expand Up @@ -48,7 +48,9 @@ inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, f
}

if (flags.retry_with_full_bb) {
/* Grow the BB and retry this net right away. We don't populate out.bb_updated_nets */
/* Grow the BB and retry this net right away.
* We don't populate out.bb_updated_nets for the serial router, since
* there is no partition tree to update. */
route_ctx.route_bb[net_id] = full_device_bb();
inet--;
continue;
Expand All @@ -62,11 +64,10 @@ inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, f
}
}

PartitionTreeDebug::log("Routing all nets took " + std::to_string(t.elapsed_sec()) + " s");
PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s");
return out;
}

/* TODO: Handle this in route_netlist */
template<typename HeapType>
void SerialNetlistRouter<HeapType>::handle_bb_updated_nets(const std::vector<ParentNetId>& /* nets */) {
}
Expand Down
12 changes: 5 additions & 7 deletions vpr/src/route/connection_router.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include "rr_graph.h"
#include "rr_graph_fwd.h"

/** Used for the flat router. The node isn't relevant to the target if
* it is an intra-block node outside of our target block */
static bool relevant_node_to_target(const RRGraphView* rr_graph,
RRNodeId node_to_add,
RRNodeId target_node);
Expand Down Expand Up @@ -997,12 +999,7 @@ t_bb ConnectionRouter<Heap>::add_high_fanout_route_tree_to_heap(
continue;
RRNodeId rr_node_to_add = rt_node.inode;

bool is_inside_bb = inside_bb(rr_node_to_add, net_bounding_box);

if(!is_inside_bb)
continue;

/* TODO: Why are we doing this? */
/* Flat router: don't go into clusters other than the target one */
if (is_flat_) {
if (!relevant_node_to_target(rr_graph_, rr_node_to_add, target_node))
continue;
Expand Down Expand Up @@ -1041,7 +1038,8 @@ t_bb ConnectionRouter<Heap>::add_high_fanout_route_tree_to_heap(
}
if (done) break;
}
//If the target bin, and it's surrounding bins were empty, just add the full route tree
/* If we didn't find enough nodes to branch off near the target
* or they are on the wrong grid layer, just add the full route tree */
if (chan_nodes_added <= SINGLE_BIN_MIN_NODES || !found_node_on_same_layer) {
add_route_tree_to_heap(rt_root, target_node, cost_params, net_bounding_box);
return net_bounding_box;
Expand Down
6 changes: 4 additions & 2 deletions vpr/src/route/netlist_routers.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ struct RouteIterResults {
bool is_routable = true;
/** Net IDs with changed routing */
std::vector<ParentNetId> rerouted_nets;
/** Net IDs with changed bounding box */
/** Net IDs with changed bounding box for this iteration.
* Used by the parallel router to update the \ref PartitionTree */
std::vector<ParentNetId> bb_updated_nets;
/** RouterStats for this iteration */
RouterStats stats;
Expand All @@ -56,7 +57,8 @@ class NetlistRouter {
* \return RouteIterResults for this iteration. */
virtual RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack) = 0;

/** Handle net bounding box updates. No-op for the serial router */
/** Handle net bounding box updates by passing them to the PartitionTree.
* No-op for the serial router */
virtual void handle_bb_updated_nets(const std::vector<ParentNetId>& nets) = 0;

/** Enable RCV for each of the ConnectionRouters this NetlistRouter manages.*/
Expand Down
13 changes: 10 additions & 3 deletions vpr/src/route/partition_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,23 @@ PartitionTree::PartitionTree(const Netlist<>& netlist) {
_root = build_helper(netlist, all_nets, 0, 0, device_ctx.grid.width() - 1, device_ctx.grid.height() - 1);
}

/** Build a branch of the PartitionTree given a set of \p nets and a bounding box.
* Calls itself recursively with smaller and smaller bounding boxes until there are less
* nets than \ref MIN_NETS_TO_PARTITION. */
std::unique_ptr<PartitionTreeNode> PartitionTree::build_helper(const Netlist<>& netlist, const std::unordered_set<ParentNetId>& nets, int x1, int y1, int x2, int y2) {
if (nets.empty())
return nullptr;

const auto& route_ctx = g_vpr_ctx.routing();

/* Only build this for 2 dimensions. Ignore the layers for now */
const auto& device_ctx = g_vpr_ctx.device();
int layer_max = device_ctx.grid.get_num_layers() - 1;

auto out = std::make_unique<PartitionTreeNode>();

if (nets.size() < MIN_NETS_TO_PARTITION) {
out->bb = {x1, x2, y1, y2, 0, 0};
out->bb = {x1, x2, y1, y2, 0, layer_max};
out->nets = nets;
/* Build net to ptree node lookup */
for(auto net_id: nets){
Expand Down Expand Up @@ -119,7 +127,7 @@ std::unique_ptr<PartitionTreeNode> PartitionTree::build_helper(const Netlist<>&

/* Couldn't find a cutline: all cutlines result in a one-way cut */
if (std::isnan(best_pos)) {
out->bb = {x1, x2, y1, y2, 0, 0};
out->bb = {x1, x2, y1, y2, 0, layer_max};
out->nets = nets;
/* Build net to ptree node lookup */
for(auto net_id: nets){
Expand Down Expand Up @@ -184,7 +192,6 @@ inline bool net_in_ptree_node(ParentNetId net_id, const PartitionTreeNode* node)
return bb.xmin >= node->bb.xmin && bb.xmax <= node->bb.xmax && bb.ymin >= node->bb.ymin && bb.ymax <= node->bb.ymax;
}

/** These nets had a bounding box update. Find new partition tree nodes for them */
void PartitionTree::update_nets(const std::vector<ParentNetId>& nets) {
for(auto net_id: nets){
PartitionTreeNode* old_ptree_node = _net_to_ptree_node[net_id];
Expand Down
6 changes: 6 additions & 0 deletions vpr/src/route/partition_tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,14 @@ class PartitionTree {
/** Access root. Shouldn't cause a segfault, because PartitionTree constructor always makes a _root */
inline PartitionTreeNode& root(void) { return *_root; }

/** Handle nets which had a bounding box update.
* Bounding boxes can only grow, so we should find a new partition tree node for
* these nets by moving them up until they fit in a node's bounds */
void update_nets(const std::vector<ParentNetId>& nets);

/** Delete all virtual nets in the tree. Used for the net decomposing router.
* Virtual nets are invalidated between iterations due to changing bounding
* boxes. */
void clear_vnets(void);

private:
Expand Down
10 changes: 3 additions & 7 deletions vpr/src/route/sink_sampling.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,20 +116,16 @@ inline std::vector<SinkPoint> quickhull(const std::vector<SinkPoint>& points) {
} // namespace sink_sampling

/** Which side of the cutline is this RRNode on?
* Cutlines are always assumed to be at cutline_axis = (cutline_pos + 0.5).
* In the context of the parallel router, a RR node is considered to be inside a bounding
* box if its drive point is inside it (xlow, ylow if the node doesn't have a direction) */
* Cutlines are always assumed to be at cutline_axis = (cutline_pos + 0.5). */
inline Side which_side(RRNodeId inode, Axis cutline_axis, int cutline_pos) {
auto& device_ctx = g_vpr_ctx.device();
const auto& rr_graph = device_ctx.rr_graph;

Direction dir = rr_graph.node_direction(inode);

if (cutline_axis == Axis::X) {
int x = dir == Direction::DEC ? rr_graph.node_xhigh(inode) : rr_graph.node_xlow(inode);
int x = rr_graph.node_xlow(inode);
return Side(x > cutline_pos); /* 1 is RIGHT */
} else {
int y = dir == Direction::DEC ? rr_graph.node_yhigh(inode) : rr_graph.node_ylow(inode);
int y = rr_graph.node_ylow(inode);
return Side(y > cutline_pos);
}
}
Expand Down
3 changes: 3 additions & 0 deletions vpr/src/route/spatial_route_tree_lookup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ SpatialRouteTreeLookup build_route_tree_spatial_lookup(const Netlist<>& net_list
float bb_area_per_sink = bb_area / fanout;
float bin_area = BIN_AREA_PER_SINK_FACTOR * bb_area_per_sink;

/* Set a minimum bin dimension so that we don't get minuscule bin sizes
* when flat routing is enabled and every LUT input becomes a sink.
* (P.S. This took some time to debug.) */
constexpr float MIN_BIN_DIM = 3;
float bin_dim = std::max(MIN_BIN_DIM, std::ceil(std::sqrt(bin_area)));

Expand Down

0 comments on commit b382e74

Please sign in to comment.