From b382e74dccebc824b68735345f79a0c200def605 Mon Sep 17 00:00:00 2001 From: Duck Deux Date: Thu, 14 Nov 2024 12:35:15 -0500 Subject: [PATCH] address review comments --- vpr/src/route/DecompNetlistRouter.h | 14 ++++++++---- vpr/src/route/DecompNetlistRouter.tpp | 24 +++++++++++---------- vpr/src/route/ParallelNetlistRouter.h | 5 +++-- vpr/src/route/ParallelNetlistRouter.tpp | 18 ++++++++-------- vpr/src/route/SerialNetlistRouter.tpp | 9 ++++---- vpr/src/route/connection_router.cpp | 12 +++++------ vpr/src/route/netlist_routers.h | 6 ++++-- vpr/src/route/partition_tree.cpp | 13 ++++++++--- vpr/src/route/partition_tree.h | 6 ++++++ vpr/src/route/sink_sampling.h | 10 +++------ vpr/src/route/spatial_route_tree_lookup.cpp | 3 +++ 11 files changed, 71 insertions(+), 49 deletions(-) diff --git a/vpr/src/route/DecompNetlistRouter.h b/vpr/src/route/DecompNetlistRouter.h index 47595e5aeb1..a41d656c240 100644 --- a/vpr/src/route/DecompNetlistRouter.h +++ b/vpr/src/route/DecompNetlistRouter.h @@ -2,7 +2,8 @@ /** @file Parallel and net-decomposing case for NetlistRouter. Works like * \see ParallelNetlistRouter, but tries to "decompose" nets and assign them to - * the next level of the partition tree where possible. */ + * the next level of the partition tree where possible. + * See "Parallel FPGA Routing with On-the-Fly Net Decomposition", FPT'24 */ #include "netlist_routers.h" #include @@ -57,6 +58,7 @@ class DecompNetlistRouter : public NetlistRouter { * \ref route_net for each net, which will handle other global updates. * \return RouteIterResults for this iteration. */ RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack); + /** Inform the PartitionTree of the nets with updated bounding boxes */ void handle_bb_updated_nets(const std::vector& nets); /** Set RCV enable flag for all routers managed by this netlist router. * Net decomposition does not work with RCV, so calling this fn with x=true is a fatal error. */ @@ -66,9 +68,13 @@ class DecompNetlistRouter : public NetlistRouter { private: /** Should we decompose this net? */ bool should_decompose_net(ParentNetId net_id, const PartitionTreeNode& node); - /** Get a bitset with sinks to route before net decomposition */ + /** Get a bitset of sinks to route before net decomposition. Output bitset is + * [1..num_sinks] where the corresponding index is set to 1 if the sink needs to + * be routed */ vtr::dynamic_bitset<> get_decomposition_mask(ParentNetId net_id, const PartitionTreeNode& node); - /** Get a bitset with sinks to route before virtual net decomposition */ + /** Get a bitset of sinks to route before virtual net decomposition. Output bitset is + * [1..num_sinks] where the corresponding index is set to 1 if the sink needs to + * be routed */ vtr::dynamic_bitset<> get_decomposition_mask_vnet(const VirtualNet& vnet, const PartitionTreeNode& node); /** Decompose and route a regular net. Output the resulting vnets to \p left and \p right. * \return Success status: true if routing is successful and left and right now contain valid virtual nets: false otherwise. */ @@ -116,7 +122,7 @@ class DecompNetlistRouter : public NetlistRouter { float _pres_fac; float _worst_neg_slack; - /** The partition tree */ + /** The partition tree. Holds the groups of nets for each partition */ vtr::optional _tree; /** Sinks to be always sampled for decomposition for each net: [0.._net_list.size()-1] diff --git a/vpr/src/route/DecompNetlistRouter.tpp b/vpr/src/route/DecompNetlistRouter.tpp index aadbcdaf2cf..a009132c45d 100644 --- a/vpr/src/route/DecompNetlistRouter.tpp +++ b/vpr/src/route/DecompNetlistRouter.tpp @@ -22,23 +22,25 @@ inline RouteIterResults DecompNetlistRouter::route_netlist(int itry, f _pres_fac = pres_fac; _worst_neg_slack = worst_neg_slack; - vtr::Timer t; + vtr::Timer timer; /* Organize netlist into a PartitionTree. * Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */ if(!_tree){ _tree = PartitionTree(_net_list); - PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(t.elapsed_sec()) + " s"); + PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(timer.elapsed_sec()) + " s"); } - /* Remove all virtual nets: we will create them for each iteration */ + /* Remove all virtual nets: we will create them for each iteration. + * This needs to be done because the partition tree can change between iterations + * due to bounding box updates, which invalidates virtual nets */ _tree->clear_vnets(); /* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */ - tbb::task_group g; - route_partition_tree_node(g, _tree->root()); - g.wait(); - PartitionTreeDebug::log("Routing all nets took " + std::to_string(t.elapsed_sec()) + " s"); + tbb::task_group group; + route_partition_tree_node(group, _tree->root()); + group.wait(); + PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s"); /* Combine results from threads */ RouteIterResults out; @@ -52,7 +54,6 @@ inline RouteIterResults DecompNetlistRouter::route_netlist(int itry, f return out; } -/* TODO: Handle this in route_netlist */ template void DecompNetlistRouter::handle_bb_updated_nets(const std::vector& nets) { VTR_ASSERT(_tree); @@ -139,8 +140,9 @@ inline bool should_decompose_vnet(const VirtualNet& vnet, const PartitionTreeNod template void DecompNetlistRouter::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node) { auto& route_ctx = g_vpr_ctx.mutable_routing(); - vtr::Timer t; + vtr::Timer timer; + /* node.nets is an unordered set, copy into vector to sort */ std::vector nets(node.nets.begin(), node.nets.end()); /* Sort so that nets with the most sinks are routed first. @@ -256,7 +258,7 @@ void DecompNetlistRouter::route_partition_tree_node(tbb::task_group& g PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size()) + " nets and " + std::to_string(node.vnets.size()) - + " virtual nets routed in " + std::to_string(t.elapsed_sec()) + + " virtual nets routed in " + std::to_string(timer.elapsed_sec()) + " s"); /* This node is finished: add left & right branches to the task queue */ @@ -674,7 +676,7 @@ vtr::dynamic_bitset<> DecompNetlistRouter::get_decomposition_mask_vnet * sinks in the small side and unblock. Add convex hull since we are in a vnet which * may not have a source at all */ if (inside_bb(tree.root().inode, vnet.clipped_bb)) { /* We have source, no need to sample after reduction in most cases */ - bool is_reduced = get_reduction_mask_vnet_with_source(vnet, node.cutline_axis, node.cutline_pos, out); + bool is_reduced = get_reduction_mask_vnet_with_source(vnet, node.cutline_axis, node.cutline_pos, out); bool source_on_cutline = is_close_to_cutline(tree.root().inode, node.cutline_axis, node.cutline_pos, 1); if (!is_reduced || source_on_cutline){ convex_hull_downsample(vnet.net_id, vnet.clipped_bb, out); diff --git a/vpr/src/route/ParallelNetlistRouter.h b/vpr/src/route/ParallelNetlistRouter.h index eaa9a1bfff4..e77fdf8344e 100644 --- a/vpr/src/route/ParallelNetlistRouter.h +++ b/vpr/src/route/ParallelNetlistRouter.h @@ -8,7 +8,7 @@ * * Note that the parallel router does not support graphical router breakpoints. * - * [0]: F. Koşar, "A net-decomposing parallel FPGA router", MS thesis, UofT ECE, 2023 */ + * [0]: "Parallel FPGA Routing with On-the-Fly Net Decomposition", FPT'24 */ #include "netlist_routers.h" #include "vtr_optional.h" @@ -53,6 +53,7 @@ class ParallelNetlistRouter : public NetlistRouter { * \ref route_net for each net, which will handle other global updates. * \return RouteIterResults for this iteration. */ RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack); + /** Inform the PartitionTree of the nets with updated bounding boxes */ void handle_bb_updated_nets(const std::vector& nets); void set_rcv_enabled(bool x); void set_timing_info(std::shared_ptr timing_info); @@ -98,7 +99,7 @@ class ParallelNetlistRouter : public NetlistRouter { float _pres_fac; float _worst_neg_slack; - /** The partition tree */ + /** The partition tree. Holds the groups of nets for each partition */ vtr::optional _tree; }; diff --git a/vpr/src/route/ParallelNetlistRouter.tpp b/vpr/src/route/ParallelNetlistRouter.tpp index 72a9a9ced95..1268ed6030e 100644 --- a/vpr/src/route/ParallelNetlistRouter.tpp +++ b/vpr/src/route/ParallelNetlistRouter.tpp @@ -21,17 +21,17 @@ inline RouteIterResults ParallelNetlistRouter::route_netlist(int itry, /* Organize netlist into a PartitionTree. * Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */ - vtr::Timer t; + vtr::Timer timer; if(!_tree){ _tree = PartitionTree(_net_list); - PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(t.elapsed_sec()) + " s"); + PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(timer.elapsed_sec()) + " s"); } /* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */ - tbb::task_group g; - route_partition_tree_node(g, _tree->root()); - g.wait(); - PartitionTreeDebug::log("Routing all nets took " + std::to_string(t.elapsed_sec()) + " s"); + tbb::task_group group; + route_partition_tree_node(group, _tree->root()); + group.wait(); + PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s"); /* Combine results from threads */ RouteIterResults out; @@ -48,6 +48,7 @@ template void ParallelNetlistRouter::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node) { auto& route_ctx = g_vpr_ctx.mutable_routing(); + /* node.nets is an unordered set, copy into vector to sort */ std::vector nets(node.nets.begin(), node.nets.end()); /* Sort so net with most sinks is routed first. */ @@ -55,7 +56,7 @@ void ParallelNetlistRouter::route_partition_tree_node(tbb::task_group& return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size(); }); - vtr::Timer t; + vtr::Timer timer; for (auto net_id : nets) { auto flags = route_net( _routers_th.local(), @@ -95,7 +96,7 @@ void ParallelNetlistRouter::route_partition_tree_node(tbb::task_group& PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size()) + " nets and " + std::to_string(node.vnets.size()) - + " virtual nets routed in " + std::to_string(t.elapsed_sec()) + + " virtual nets routed in " + std::to_string(timer.elapsed_sec()) + " s"); /* This node is finished: add left & right branches to the task queue */ @@ -111,7 +112,6 @@ void ParallelNetlistRouter::route_partition_tree_node(tbb::task_group& } } -/* TODO: Handle this in route_netlist */ template void ParallelNetlistRouter::handle_bb_updated_nets(const std::vector& nets) { VTR_ASSERT(_tree); diff --git a/vpr/src/route/SerialNetlistRouter.tpp b/vpr/src/route/SerialNetlistRouter.tpp index cb7c85a55ca..63497d7d394 100644 --- a/vpr/src/route/SerialNetlistRouter.tpp +++ b/vpr/src/route/SerialNetlistRouter.tpp @@ -11,7 +11,7 @@ inline RouteIterResults SerialNetlistRouter::route_netlist(int itry, f auto& route_ctx = g_vpr_ctx.mutable_routing(); RouteIterResults out; - vtr::Timer t; + vtr::Timer timer; /* Sort so net with most sinks is routed first */ auto sorted_nets = std::vector(_net_list.nets().begin(), _net_list.nets().end()); @@ -48,7 +48,9 @@ inline RouteIterResults SerialNetlistRouter::route_netlist(int itry, f } if (flags.retry_with_full_bb) { - /* Grow the BB and retry this net right away. We don't populate out.bb_updated_nets */ + /* Grow the BB and retry this net right away. + * We don't populate out.bb_updated_nets for the serial router, since + * there is no partition tree to update. */ route_ctx.route_bb[net_id] = full_device_bb(); inet--; continue; @@ -62,11 +64,10 @@ inline RouteIterResults SerialNetlistRouter::route_netlist(int itry, f } } - PartitionTreeDebug::log("Routing all nets took " + std::to_string(t.elapsed_sec()) + " s"); + PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s"); return out; } -/* TODO: Handle this in route_netlist */ template void SerialNetlistRouter::handle_bb_updated_nets(const std::vector& /* nets */) { } diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp index 2fed572d5d0..916ed342178 100644 --- a/vpr/src/route/connection_router.cpp +++ b/vpr/src/route/connection_router.cpp @@ -4,6 +4,8 @@ #include "rr_graph.h" #include "rr_graph_fwd.h" +/** Used for the flat router. The node isn't relevant to the target if + * it is an intra-block node outside of our target block */ static bool relevant_node_to_target(const RRGraphView* rr_graph, RRNodeId node_to_add, RRNodeId target_node); @@ -997,12 +999,7 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( continue; RRNodeId rr_node_to_add = rt_node.inode; - bool is_inside_bb = inside_bb(rr_node_to_add, net_bounding_box); - - if(!is_inside_bb) - continue; - - /* TODO: Why are we doing this? */ + /* Flat router: don't go into clusters other than the target one */ if (is_flat_) { if (!relevant_node_to_target(rr_graph_, rr_node_to_add, target_node)) continue; @@ -1041,7 +1038,8 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( } if (done) break; } - //If the target bin, and it's surrounding bins were empty, just add the full route tree + /* If we didn't find enough nodes to branch off near the target + * or they are on the wrong grid layer, just add the full route tree */ if (chan_nodes_added <= SINGLE_BIN_MIN_NODES || !found_node_on_same_layer) { add_route_tree_to_heap(rt_root, target_node, cost_params, net_bounding_box); return net_bounding_box; diff --git a/vpr/src/route/netlist_routers.h b/vpr/src/route/netlist_routers.h index 55337d21688..1524c2ddb38 100644 --- a/vpr/src/route/netlist_routers.h +++ b/vpr/src/route/netlist_routers.h @@ -38,7 +38,8 @@ struct RouteIterResults { bool is_routable = true; /** Net IDs with changed routing */ std::vector rerouted_nets; - /** Net IDs with changed bounding box */ + /** Net IDs with changed bounding box for this iteration. + * Used by the parallel router to update the \ref PartitionTree */ std::vector bb_updated_nets; /** RouterStats for this iteration */ RouterStats stats; @@ -56,7 +57,8 @@ class NetlistRouter { * \return RouteIterResults for this iteration. */ virtual RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack) = 0; - /** Handle net bounding box updates. No-op for the serial router */ + /** Handle net bounding box updates by passing them to the PartitionTree. + * No-op for the serial router */ virtual void handle_bb_updated_nets(const std::vector& nets) = 0; /** Enable RCV for each of the ConnectionRouters this NetlistRouter manages.*/ diff --git a/vpr/src/route/partition_tree.cpp b/vpr/src/route/partition_tree.cpp index e721a386933..ac95a9a5285 100644 --- a/vpr/src/route/partition_tree.cpp +++ b/vpr/src/route/partition_tree.cpp @@ -15,15 +15,23 @@ PartitionTree::PartitionTree(const Netlist<>& netlist) { _root = build_helper(netlist, all_nets, 0, 0, device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); } +/** Build a branch of the PartitionTree given a set of \p nets and a bounding box. + * Calls itself recursively with smaller and smaller bounding boxes until there are less + * nets than \ref MIN_NETS_TO_PARTITION. */ std::unique_ptr PartitionTree::build_helper(const Netlist<>& netlist, const std::unordered_set& nets, int x1, int y1, int x2, int y2) { if (nets.empty()) return nullptr; const auto& route_ctx = g_vpr_ctx.routing(); + + /* Only build this for 2 dimensions. Ignore the layers for now */ + const auto& device_ctx = g_vpr_ctx.device(); + int layer_max = device_ctx.grid.get_num_layers() - 1; + auto out = std::make_unique(); if (nets.size() < MIN_NETS_TO_PARTITION) { - out->bb = {x1, x2, y1, y2, 0, 0}; + out->bb = {x1, x2, y1, y2, 0, layer_max}; out->nets = nets; /* Build net to ptree node lookup */ for(auto net_id: nets){ @@ -119,7 +127,7 @@ std::unique_ptr PartitionTree::build_helper(const Netlist<>& /* Couldn't find a cutline: all cutlines result in a one-way cut */ if (std::isnan(best_pos)) { - out->bb = {x1, x2, y1, y2, 0, 0}; + out->bb = {x1, x2, y1, y2, 0, layer_max}; out->nets = nets; /* Build net to ptree node lookup */ for(auto net_id: nets){ @@ -184,7 +192,6 @@ inline bool net_in_ptree_node(ParentNetId net_id, const PartitionTreeNode* node) return bb.xmin >= node->bb.xmin && bb.xmax <= node->bb.xmax && bb.ymin >= node->bb.ymin && bb.ymax <= node->bb.ymax; } -/** These nets had a bounding box update. Find new partition tree nodes for them */ void PartitionTree::update_nets(const std::vector& nets) { for(auto net_id: nets){ PartitionTreeNode* old_ptree_node = _net_to_ptree_node[net_id]; diff --git a/vpr/src/route/partition_tree.h b/vpr/src/route/partition_tree.h index 2a81620607c..82b75976b83 100644 --- a/vpr/src/route/partition_tree.h +++ b/vpr/src/route/partition_tree.h @@ -86,8 +86,14 @@ class PartitionTree { /** Access root. Shouldn't cause a segfault, because PartitionTree constructor always makes a _root */ inline PartitionTreeNode& root(void) { return *_root; } + /** Handle nets which had a bounding box update. + * Bounding boxes can only grow, so we should find a new partition tree node for + * these nets by moving them up until they fit in a node's bounds */ void update_nets(const std::vector& nets); + /** Delete all virtual nets in the tree. Used for the net decomposing router. + * Virtual nets are invalidated between iterations due to changing bounding + * boxes. */ void clear_vnets(void); private: diff --git a/vpr/src/route/sink_sampling.h b/vpr/src/route/sink_sampling.h index 6dfef37a5e0..81437805e86 100644 --- a/vpr/src/route/sink_sampling.h +++ b/vpr/src/route/sink_sampling.h @@ -116,20 +116,16 @@ inline std::vector quickhull(const std::vector& points) { } // namespace sink_sampling /** Which side of the cutline is this RRNode on? - * Cutlines are always assumed to be at cutline_axis = (cutline_pos + 0.5). - * In the context of the parallel router, a RR node is considered to be inside a bounding - * box if its drive point is inside it (xlow, ylow if the node doesn't have a direction) */ + * Cutlines are always assumed to be at cutline_axis = (cutline_pos + 0.5). */ inline Side which_side(RRNodeId inode, Axis cutline_axis, int cutline_pos) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - Direction dir = rr_graph.node_direction(inode); - if (cutline_axis == Axis::X) { - int x = dir == Direction::DEC ? rr_graph.node_xhigh(inode) : rr_graph.node_xlow(inode); + int x = rr_graph.node_xlow(inode); return Side(x > cutline_pos); /* 1 is RIGHT */ } else { - int y = dir == Direction::DEC ? rr_graph.node_yhigh(inode) : rr_graph.node_ylow(inode); + int y = rr_graph.node_ylow(inode); return Side(y > cutline_pos); } } diff --git a/vpr/src/route/spatial_route_tree_lookup.cpp b/vpr/src/route/spatial_route_tree_lookup.cpp index 45bfd130810..ddbb066a188 100644 --- a/vpr/src/route/spatial_route_tree_lookup.cpp +++ b/vpr/src/route/spatial_route_tree_lookup.cpp @@ -17,6 +17,9 @@ SpatialRouteTreeLookup build_route_tree_spatial_lookup(const Netlist<>& net_list float bb_area_per_sink = bb_area / fanout; float bin_area = BIN_AREA_PER_SINK_FACTOR * bb_area_per_sink; + /* Set a minimum bin dimension so that we don't get minuscule bin sizes + * when flat routing is enabled and every LUT input becomes a sink. + * (P.S. This took some time to debug.) */ constexpr float MIN_BIN_DIM = 3; float bin_dim = std::max(MIN_BIN_DIM, std::ceil(std::sqrt(bin_area)));