From 7b753e1bc1efd419b61c010577170ea8ccfa97a8 Mon Sep 17 00:00:00 2001 From: Jonathan Johnson Date: Thu, 24 Aug 2023 08:49:11 -0700 Subject: [PATCH] Removing serde's derive feature This set of changes started as an experiment in removing the derive feature from serde after having seen some interesting analysis of build parallelization due to crates usage of the derive macro. In the process of this, the examples started looking ugly due to the split `use` statements for the two crates. I realized one area of API cleanup I hadn't done yet was make more ergonomic serialize/deserialize functions for the SymbolMaps. After I did this, the examples no longer needed the traits, and only needed the derive macros. --- CHANGELOG.md | 16 ++++++++ benchmarks/examples/logs.rs | 11 +++-- pot/Cargo.toml | 5 ++- pot/examples/preshared-symbols.rs | 17 +++----- pot/examples/simple.rs | 3 +- pot/src/de.rs | 22 ++++++++++ pot/src/format.rs | 11 ++--- pot/src/lib.rs | 68 ++++++++++--------------------- pot/src/ser.rs | 21 ++++++++++ 9 files changed, 99 insertions(+), 75 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2e21043..abae30b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 in the atom. - `SymbolMapRef` is now a struct with private contents. - `Error` is now `#[non_exhaustive]`. +- `format::Float` and `format::Integer` no longer implement + `Serialize`/`Deserialize`. These implementations were derived but never + actually used. To improve this crate's build parallelization, a decision was + made to remove these usages of serde's derive macro. Implementing these traits + would be trivial, but the crate maintainer doesn't believe anyone is actually + using these implementations, so they were intentionally skipped. Please file + an issue if this affected you and you would like to see these implementations + added again. ### Changed @@ -44,6 +52,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 chosen at the time due to dependencies also requiring this MSRV. - Tracing instrumentation has been changed from the default level of INFO to TRACE. +- This crate no longer activates the `derive` feature of `serde`. ### Added @@ -55,6 +64,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `de::SymbolMap` and `ser::SymbolMap` now both implement `Serialize` and `Deserialize` using the same serialization strategy. This allows preshared dictionaries to be used, and for state to be saved and restored. +- `de::SymbolMap` and `ser::SymbolMap` have new convenience methods that allow + serializing and deserializing values directly: + + - `de::SymbolMap::deserialize_slice` + - `de::SymbolMap::deserialize_from` + - `ser::SymbolMap::serialize_to_vec` + - `ser::SymbolMap::serialize_to` [9]: https://github.com/khonsulabs/pot/issues/9 diff --git a/benchmarks/examples/logs.rs b/benchmarks/examples/logs.rs index 23516b4d..aac4faa8 100644 --- a/benchmarks/examples/logs.rs +++ b/benchmarks/examples/logs.rs @@ -69,20 +69,19 @@ fn main() -> anyhow::Result<()> { // so let's show the benefits by just encoding a single log entry. let mut sender_state = pot::ser::SymbolMap::default(); let mut receiver_state = pot::de::SymbolMap::default(); - let mut payload_buffer = Vec::new(); - logs.entries[0].serialize(&mut sender_state.serializer_for(&mut payload_buffer)?)?; + let mut payload_buffer = sender_state.serialize_to_vec(&logs.entries[0])?; let first_transmission_length = payload_buffer.len(); { assert_eq!( - &Log::deserialize(&mut receiver_state.deserializer_for_slice(&payload_buffer)?)?, + &receiver_state.deserialize_slice::(&payload_buffer)?, &logs.entries[0] ); } - let mut payload_buffer = Vec::new(); - logs.entries[0].serialize(&mut sender_state.serializer_for(&mut payload_buffer)?)?; + payload_buffer.clear(); + sender_state.serialize_to(&mut payload_buffer, &logs.entries[0])?; let subsequent_transmission_length = payload_buffer.len(); assert_eq!( - &Log::deserialize(&mut receiver_state.deserializer_for_slice(&payload_buffer)?)?, + &receiver_state.deserialize_slice::(&payload_buffer)?, &logs.entries[0] ); diff --git a/pot/Cargo.toml b/pot/Cargo.toml index e9fe633c..fbe05def 100644 --- a/pot/Cargo.toml +++ b/pot/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pot" -version = "2.0.0" +version = "3.0.0" edition = "2021" description = "A concise binary serialization format written for `BonsaiDb`." license = "MIT OR Apache-2.0" @@ -14,12 +14,13 @@ rust-version = "1.70.0" default = [] [dependencies] -serde = { version = "1.0.136", features = ["derive"] } +serde = { version = "1.0.136" } tracing = { version = "0.1.30", optional = true } byteorder = "1.4.3" half = "2.2.1" [dev-dependencies] +serde_derive = "1.0.136" tracing-subscriber = "0.3.8" tracing = "0.1.30" serde_bytes = "0.11.5" diff --git a/pot/examples/preshared-symbols.rs b/pot/examples/preshared-symbols.rs index c9ba71eb..ca1ef32a 100644 --- a/pot/examples/preshared-symbols.rs +++ b/pot/examples/preshared-symbols.rs @@ -1,6 +1,5 @@ // begin rustme snippet: example -use serde::{Deserialize, Serialize}; - +use serde_derive::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Default)] pub struct User { id: u64, @@ -22,10 +21,7 @@ fn main() { name: String::from("ecton"), }; let encoded_without_map = pot::to_vec(&original_user).unwrap(); - let mut encoded_with_map = Vec::new(); - original_user - .serialize(&mut preshared_map.serializer_for(&mut encoded_with_map).unwrap()) - .unwrap(); + let encoded_with_map = preshared_map.serialize_to_vec(&original_user).unwrap(); println!( "Default User encoded without map: {} bytes", encoded_without_map.len() @@ -42,12 +38,9 @@ fn main() { // Deserialize the symbol map. let mut deserializer_map: pot::de::SymbolMap = pot::from_slice(&preshared_map_bytes).unwrap(); // Deserialize the payload using the map. - let user = User::deserialize( - &mut deserializer_map - .deserializer_for_slice(&encoded_with_map) - .unwrap(), - ) - .unwrap(); + let user: User = deserializer_map + .deserialize_slice(&encoded_with_map) + .unwrap(); assert_eq!(user, original_user); } diff --git a/pot/examples/simple.rs b/pot/examples/simple.rs index 6974141f..179b3a94 100644 --- a/pot/examples/simple.rs +++ b/pot/examples/simple.rs @@ -1,6 +1,5 @@ // begin rustme snippet: example -use serde::{Deserialize, Serialize}; - +use serde_derive::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] pub struct User { id: u64, diff --git a/pot/src/de.rs b/pot/src/de.rs index 572c2652..49d37c46 100644 --- a/pot/src/de.rs +++ b/pot/src/de.rs @@ -1210,6 +1210,28 @@ impl SymbolMap { Deserializer::from_read(reader, self.persistent(), usize::MAX) } + /// Deserializes `T` from `slice`. + /// + /// This should only be used with data generated by using a persistent + /// [`ser::SymbolMap`](crate::ser::SymbolMap). + pub fn deserialize_slice<'de, T>(&mut self, slice: &'de [u8]) -> Result + where + T: Deserialize<'de>, + { + T::deserialize(&mut self.deserializer_for_slice(slice)?) + } + + /// Deserializes `T` from `reader`. + /// + /// This should only be used with data generated by using a persistent + /// [`ser::SymbolMap`](crate::ser::SymbolMap). + pub fn deserialize_from<'de, T>(&mut self, reader: impl Read) -> Result + where + T: Deserialize<'de>, + { + T::deserialize(&mut self.deserializer_for(reader)?) + } + #[must_use] fn persistent<'de>(&mut self) -> SymbolMapRef<'_, 'de> { SymbolMapRef(SymbolMapRefPrivate::Persistent(self)) diff --git a/pot/src/format.rs b/pot/src/format.rs index 84978347..88b41b5e 100644 --- a/pot/src/format.rs +++ b/pot/src/format.rs @@ -2,7 +2,6 @@ use std::fmt::Display; use byteorder::{BigEndian, LittleEndian, ReadBytesExt, WriteBytesExt}; use half::f16; -use serde::{Deserialize, Serialize}; pub(crate) const CURRENT_VERSION: u8 = 0; @@ -546,11 +545,10 @@ pub fn write_bytes(mut writer: W, value: &[u8]) -> std::io::Re } /// An integer type that can safely convert between other number types using compile-time evaluation. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)] -#[serde(transparent)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct Integer(pub(crate) InnerInteger); -#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub(crate) enum InnerInteger { /// An i8 value. I8(i8), @@ -1131,11 +1129,10 @@ pub struct Atom<'de> { } /// A floating point number that can safely convert between other number types using compile-time evaluation when possible. -#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq)] -#[serde(transparent)] +#[derive(Debug, Copy, Clone, PartialEq)] pub struct Float(pub(crate) InnerFloat); -#[derive(Debug, Serialize, Deserialize, Copy, Clone)] +#[derive(Debug, Copy, Clone)] pub(crate) enum InnerFloat { /// An f64 value. F64(f64), diff --git a/pot/src/lib.rs b/pot/src/lib.rs index f0f9ed2b..466337da 100644 --- a/pot/src/lib.rs +++ b/pot/src/lib.rs @@ -191,6 +191,7 @@ mod tests { use std::sync::OnceLock; use serde::{Deserializer, Serializer}; + use serde_derive::{Deserialize, Serialize}; use super::*; use crate::format::{Float, Integer, CURRENT_VERSION}; @@ -814,24 +815,17 @@ mod tests { let mut sender = ser::SymbolMap::default(); let mut receiver = de::SymbolList::default(); - let mut bytes = Vec::new(); - NumbersStruct::default() - .serialize(&mut sender.serializer_for(&mut bytes).unwrap()) - .unwrap(); - let _result = - NumbersStruct::deserialize(&mut receiver.deserializer_for_slice(&bytes).unwrap()) - .unwrap(); + let mut bytes = sender.serialize_to_vec(&NumbersStruct::default()).unwrap(); + let _result = receiver.deserialize_slice::(&bytes).unwrap(); let symbol_count_after_first_send = receiver.len(); let first_payload_len = bytes.len(); // Send again, confirm the symbol list didn't grow. bytes.clear(); - NumbersStruct::default() - .serialize(&mut sender.serializer_for(&mut bytes).unwrap()) + sender + .serialize_to(&mut bytes, &NumbersStruct::default()) .unwrap(); - let _result = - NumbersStruct::deserialize(&mut receiver.deserializer_for_slice(&bytes).unwrap()) - .unwrap(); + let _result = receiver.deserialize_slice::(&bytes).unwrap(); assert_eq!(symbol_count_after_first_send, receiver.len()); println!( "First: {first_payload_len} bytes; Second: {} bytes", @@ -845,24 +839,21 @@ mod tests { let mut sender = ser::SymbolMap::default(); let mut receiver = de::SymbolList::default(); - let mut bytes = Vec::new(); - NumbersStruct::default() - .serialize(&mut sender.serializer_for(&mut bytes).unwrap()) + let mut bytes = sender.serialize_to_vec(&NumbersStruct::default()).unwrap(); + let _result = receiver + .deserialize_from::(&bytes[..]) .unwrap(); - let _result = - NumbersStruct::deserialize(&mut receiver.deserializer_for(&bytes[..]).unwrap()) - .unwrap(); let symbol_count_after_first_send = receiver.len(); let first_payload_len = bytes.len(); // Send again, confirm the symbol list didn't grow. bytes.clear(); - NumbersStruct::default() - .serialize(&mut sender.serializer_for(&mut bytes).unwrap()) + sender + .serialize_to(&mut bytes, &NumbersStruct::default()) + .unwrap(); + let _result = receiver + .deserialize_from::(&bytes[..]) .unwrap(); - let _result = - NumbersStruct::deserialize(&mut receiver.deserializer_for(&bytes[..]).unwrap()) - .unwrap(); assert_eq!(symbol_count_after_first_send, receiver.len()); println!( "First: {first_payload_len} bytes; Second: {} bytes", @@ -883,16 +874,13 @@ mod tests { assert!(sender.is_empty()); let mut receiver = crate::de::SymbolMap::new(); assert!(receiver.is_empty()); - let mut bytes = Vec::new(); // Send the first payload, populating the map. - Payload::default() - .serialize(&mut sender.serializer_for(&mut bytes).unwrap()) - .unwrap(); + let mut bytes = sender.serialize_to_vec(&Payload::default()).unwrap(); assert_eq!(sender.len(), 2); assert_eq!( - Payload::deserialize(&mut receiver.deserializer_for_slice(&bytes).unwrap()).unwrap(), + receiver.deserialize_slice::(&bytes).unwrap(), Payload::default() ); assert_eq!(receiver.len(), 2); @@ -913,31 +901,19 @@ mod tests { // by the serialized map and the original map are identical. let new_payload = Payload { a: 1, b: 2 }; bytes.clear(); - new_payload - .serialize(&mut sender.serializer_for(&mut bytes).unwrap()) - .unwrap(); - let mut from_serialized_sender = Vec::new(); - new_payload - .serialize( - &mut deserialized_sender - .serializer_for(&mut from_serialized_sender) - .unwrap(), - ) - .unwrap(); + sender.serialize_to(&mut bytes, &new_payload).unwrap(); + let from_serialized_sender = deserialized_sender.serialize_to_vec(&new_payload).unwrap(); assert_eq!(bytes, from_serialized_sender); // Deserialize the payload assert_eq!( - Payload::deserialize(&mut receiver.deserializer_for_slice(&bytes).unwrap()).unwrap(), + receiver.deserialize_slice::(&bytes).unwrap(), new_payload ); assert_eq!( - Payload::deserialize( - &mut deserialized_receiver - .deserializer_for_slice(&bytes) - .unwrap() - ) - .unwrap(), + deserialized_receiver + .deserialize_slice::(&bytes) + .unwrap(), new_payload ); } diff --git a/pot/src/ser.rs b/pot/src/ser.rs index b11126fd..c0138a56 100644 --- a/pot/src/ser.rs +++ b/pot/src/ser.rs @@ -1,6 +1,7 @@ use std::borrow::Cow; use std::cmp::Ordering; use std::fmt::{Debug, Display}; +use std::io::Write; use std::ops::Range; use std::usize; @@ -587,6 +588,26 @@ impl SymbolMap { Serializer::new_with_symbol_map(output, SymbolMapRef::Persistent(self)) } + /// Serializes `value` into `writer` while persisting symbols into `self`. + pub fn serialize_to(&mut self, writer: W, value: &T) -> Result<()> + where + W: Write, + T: Serialize, + { + value.serialize(&mut self.serializer_for(writer)?) + } + + /// Serializes `value` into a new `Vec` while persisting symbols into + /// `self`. + pub fn serialize_to_vec(&mut self, value: &T) -> Result> + where + T: Serialize, + { + let mut output = Vec::new(); + self.serialize_to(&mut output, value)?; + Ok(output) + } + fn find_or_add(&mut self, symbol: &'static str) -> RegisteredSymbol { // Symbols have to be static strings, and so we can rely on the addres // not changing. To avoid string comparisons, we're going to use the