From bf4570528a14c08083947051b2a131a24148f042 Mon Sep 17 00:00:00 2001 From: Declan Kelly Date: Thu, 4 Jul 2024 00:57:54 -0700 Subject: [PATCH] Fixup the tree stats collector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Description** - Switch the TreeStats from using custom Debug to Display - Instead of visiting the tree and passing back up the output recursively, mutate a single value - Make the `collect` function safe, since its requirements are satisfied by being passed the map reference. **Motivation** - Display is more in line with a custom string representation - Mild performance benefit to mutating a single value - Easier to call the safe function **Testing Done** `cargo test`. Ran hyperfine vs prev commit: ``` thedeck@c889f3b04fb0 blart % hyperfine --warmup 3 '/tmp/tree_stats_1 benches/dict.txt' '/tmp/tree_stats_2 benches/dict.txt' Benchmark 1: /tmp/tree_stats_1 benches/dict.txt Time (mean ± σ): 115.8 ms ± 5.1 ms [User: 103.6 ms, System: 4.6 ms] Range (min … max): 106.7 ms … 126.4 ms 22 runs Benchmark 2: /tmp/tree_stats_2 benches/dict.txt Time (mean ± σ): 107.3 ms ± 4.7 ms [User: 95.0 ms, System: 4.3 ms] Range (min … max): 99.5 ms … 122.7 ms 26 runs Summary /tmp/tree_stats_2 benches/dict.txt ran 1.08 ± 0.07 times faster than /tmp/tree_stats_1 benches/dict.txt ``` --- examples/count_words.rs | 1 + examples/tree_stats.rs | 5 +- src/nodes/visitor/tree_stats.rs | 129 ++++++++++++++------------------ 3 files changed, 59 insertions(+), 76 deletions(-) diff --git a/examples/count_words.rs b/examples/count_words.rs index fb88f533..9bd284f3 100644 --- a/examples/count_words.rs +++ b/examples/count_words.rs @@ -49,6 +49,7 @@ fn main() -> Result<(), Box> { } #[derive(Debug)] +#[allow(dead_code)] // this struct is used for its debug repr struct WordStats<'b> { num_unique: u64, first_word: &'b [u8], diff --git a/examples/tree_stats.rs b/examples/tree_stats.rs index 80044d19..fd209a4f 100644 --- a/examples/tree_stats.rs +++ b/examples/tree_stats.rs @@ -41,10 +41,9 @@ fn main() -> Result<(), Box> { } fn collect_and_output_stats(tree: TreeMap, ()>) -> Result<(), Box> { - // SAFETY: There are no concurrent mutation to the tree node or its children - let stats = unsafe { TreeStatsCollector::collect(&tree).unwrap() }; + let stats = TreeStatsCollector::collect(&tree).unwrap(); - println!("{stats:#?}"); + println!("{stats}"); let overhead_bytes_per_key_byte = (stats.tree.mem_usage as f64) / (stats.leaf.sum_key_bytes as f64); diff --git a/src/nodes/visitor/tree_stats.rs b/src/nodes/visitor/tree_stats.rs index e30bf444..3ead0264 100644 --- a/src/nodes/visitor/tree_stats.rs +++ b/src/nodes/visitor/tree_stats.rs @@ -8,29 +8,31 @@ use crate::{ /// A visitor of the radix tree which collects statistics about the tree, like /// how many inner nodes of each type, how many leaves #[derive(Debug)] -pub struct TreeStatsCollector; +pub struct TreeStatsCollector { + current: TreeStats, +} impl TreeStatsCollector { /// Run the tree stats collection on the given root node, then return the - /// accumalated stats. - /// - /// # Safety - /// - For the duration of this function, the given node and all its - /// children nodes must not get mutated. - pub unsafe fn collect( + /// accumulated stats. + pub fn collect( tree: &TreeMap, ) -> Option { - let mut collector = TreeStatsCollector; + if let Some(root) = tree.root { + let mut collector = TreeStatsCollector { + current: TreeStats::default(), + }; + + root.visit_with(&mut collector); - tree.root.map(|root| root.visit_with(&mut collector)) + Some(collector.current) + } else { + None + } } /// Iterate through the given tree and return the number of leaf nodes. - /// - /// # Safety - /// - For the duration of this function, the given node and all its - /// children nodes must not get mutated. - pub unsafe fn count_leaf_nodes( + pub fn count_leaf_nodes( tree: &TreeMap, ) -> usize { struct LeafNodeCounter; @@ -186,7 +188,7 @@ impl Add for LeafStats { } /// Collection of stats about the number of nodes types present in a tree -#[derive(Clone, Copy, PartialEq, Eq, Hash, Default)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] pub struct TreeStats { /// Stats for [`InnerNode4`][crate::nodes::InnerNode4]s pub node4: InnerNodeStats, @@ -229,61 +231,48 @@ impl Visitor for TreeStatsColle where K: AsBytes, { - type Output = TreeStats; + type Output = (); fn default_output(&self) -> Self::Output { - TreeStats::default() + () } - fn combine_output(&self, o1: Self::Output, o2: Self::Output) -> Self::Output { - TreeStats { - node4: o1.node4 + o2.node4, - node16: o1.node16 + o2.node16, - node48: o1.node48 + o2.node48, - node256: o1.node256 + o2.node256, - tree: o1.tree + o2.tree, - leaf: o1.leaf + o2.leaf, - } + fn combine_output(&self, _: Self::Output, _: Self::Output) -> Self::Output { + () } fn visit_node4(&mut self, t: &InnerNode4) -> Self::Output { - let mut output = t.super_visit_with(self); - output.node4.aggregate_data(t); - output.tree.aggregate_data(t); - output + t.super_visit_with(self); + self.current.node4.aggregate_data(t); + self.current.tree.aggregate_data(t); } fn visit_node16(&mut self, t: &InnerNode16) -> Self::Output { - let mut output = t.super_visit_with(self); - output.node16.aggregate_data(t); - output.tree.aggregate_data(t); - output + t.super_visit_with(self); + self.current.node16.aggregate_data(t); + self.current.tree.aggregate_data(t); } fn visit_node48(&mut self, t: &InnerNode48) -> Self::Output { - let mut output = t.super_visit_with(self); - output.node48.aggregate_data(t); - output.tree.aggregate_data(t); - output + t.super_visit_with(self); + self.current.node48.aggregate_data(t); + self.current.tree.aggregate_data(t); } fn visit_node256(&mut self, t: &InnerNode256) -> Self::Output { - let mut output = t.super_visit_with(self); - output.node256.aggregate_data(t); - output.tree.aggregate_data(t); - output + t.super_visit_with(self); + self.current.node256.aggregate_data(t); + self.current.tree.aggregate_data(t); } fn visit_leaf(&mut self, t: &LeafNode) -> Self::Output { - let mut output = TreeStats::default(); - output.leaf.count += 1; - output.leaf.sum_key_bytes += t.key_ref().as_bytes().len(); - output.leaf.mem_usage += std::mem::size_of_val(t); - output + self.current.leaf.count += 1; + self.current.leaf.sum_key_bytes += t.key_ref().as_bytes().len(); + self.current.leaf.mem_usage += std::mem::size_of_val(t); } } -impl std::fmt::Debug for TreeStats { +impl std::fmt::Display for TreeStats { #[rustfmt::skip] fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let TreeStats { @@ -292,30 +281,24 @@ impl std::fmt::Debug for TreeStats { node48, node256, tree, - leaf, + .. } = self; - f.debug_struct("TreeStats") - .field("node4", &node4) - .field("node16", &node16) - .field("node48", &node48) - .field("node256", &node256) - .field("tree", &tree) - .field("leaf", &leaf) - .finish() - .and(f.write_str("\n")) - .and(f.write_fmt(format_args!("memory usage (inner nodes): {} bytes\n", tree.mem_usage))) - .and(f.write_fmt(format_args!("memory usage (inner nodes + leaf): {} bytes\n", self.total_memory_usage()))) - .and(f.write_fmt(format_args!("bytes/entry: {:.5}\n", self.bytes_per_entry()))) - .and(f.write_fmt(format_args!("bytes/entry (with leaf): {:.5}\n", self.bytes_per_entry_with_leaf()))) - .and(f.write_fmt(format_args!("avg prefix length: {:.5} bytes\n", tree.avg_prefix_len()))) - .and(f.write_fmt(format_args!("avg capped prefix length: {:.5} bytes\n", tree.avg_capped_prefix_len()))) - .and(f.write_fmt(format_args!("% used header bytes (0-1): {:.5}\n", tree.percentage_header_bytes()))) - .and(f.write_fmt(format_args!("% used slots (0-1): {:.5}\n", tree.percentage_slots()))) - .and(f.write_fmt(format_args!("n4 size: {:?} bytes\n", node4.node_size()))) - .and(f.write_fmt(format_args!("n16 size: {:?} bytes\n", node16.node_size()))) - .and(f.write_fmt(format_args!("n48 size: {:?} bytes\n", node48.node_size()))) - .and(f.write_fmt(format_args!("n256 size: {:?} bytes\n", node256.node_size()))) - .and(f.write_fmt(format_args!("max prefix length: {} bytes", tree.max_prefix_len_bytes))) + write!(f, "{:#?}", self)?; + f.write_str("\n")?; + f.write_fmt(format_args!("memory usage (inner nodes): {} bytes\n", tree.mem_usage))?; + f.write_fmt(format_args!("memory usage (inner nodes + leaf): {} bytes\n", self.total_memory_usage()))?; + f.write_fmt(format_args!("bytes/entry: {:.5}\n", self.bytes_per_entry()))?; + f.write_fmt(format_args!("bytes/entry (with leaf): {:.5}\n", self.bytes_per_entry_with_leaf()))?; + f.write_fmt(format_args!("avg prefix length: {:.5} bytes\n", tree.avg_prefix_len()))?; + f.write_fmt(format_args!("avg capped prefix length: {:.5} bytes\n", tree.avg_capped_prefix_len()))?; + f.write_fmt(format_args!("% used header bytes (0-1): {:.5}\n", tree.percentage_header_bytes()))?; + f.write_fmt(format_args!("% used slots (0-1): {:.5}\n", tree.percentage_slots()))?; + f.write_fmt(format_args!("n4 size: {:?} bytes\n", node4.node_size()))?; + f.write_fmt(format_args!("n16 size: {:?} bytes\n", node16.node_size()))?; + f.write_fmt(format_args!("n48 size: {:?} bytes\n", node48.node_size()))?; + f.write_fmt(format_args!("n256 size: {:?} bytes\n", node256.node_size()))?; + f.write_fmt(format_args!("max prefix length: {} bytes", tree.max_prefix_len_bytes))?; + Ok(()) } } @@ -333,7 +316,7 @@ mod tests { { tree.try_insert(k, v).unwrap(); } - let stats = unsafe { TreeStatsCollector::collect(&tree).unwrap() }; + let stats = TreeStatsCollector::collect(&tree).unwrap(); let expected_inner = InnerNodeStats { count: 15, @@ -368,7 +351,7 @@ mod tests { { tree.try_insert(k, v).unwrap(); } - let stats = unsafe { TreeStatsCollector::collect(&tree).unwrap() }; + let stats = TreeStatsCollector::collect(&tree).unwrap(); let node4 = InnerNodeStats { count: 16,