diff --git a/CHANGELOG.md b/CHANGELOG.md index acb4d697..c406083a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,31 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased + +### Added + +- `Compatibility` is a new enum that controls compatibility of serialization and + deserialization. `Compatibility::Full` is the default compatibility level in + `v3.x`, and it serializes data in a way that even Pot `v1.0` deserializers can + deserialize. + + `Compatibility::V4` is a new serialization format that serializes enum + variants without associated data in a way that allows `Value` deserialization + to be done unambiguously. See [#11][11] for an example of this issue. This bug + only affected `deserialize_any`-type deserialization. Typical deserialization + works correctly. + + Compatibility can be configured using these new APIs: + + - `Config::compatibility` + - `Serializer::new_with_compatibility` + - `SymbolMap::with_compatibility` + - `SymbolMap::set_compatibility` + +[11]: https://github.com/khonsulabs/pot/issues/11 + + ## 3.0.0 ### Breaking Changes diff --git a/pot/src/de.rs b/pot/src/de.rs index 49d37c46..7a0fb2e7 100644 --- a/pot/src/de.rs +++ b/pot/src/de.rs @@ -95,7 +95,7 @@ impl<'s, 'de, R: Reader<'de>> Deserializer<'s, 'de, R> { fn read_header(&mut self) -> Result<()> { let version = format::read_header(&mut self.input)?; - if version == CURRENT_VERSION { + if version <= CURRENT_VERSION { Ok(()) } else { Err(Error::IncompatibleVersion) @@ -996,15 +996,21 @@ impl<'a, 's, 'de, R: Reader<'de>> EnumAccess<'de> for &'a mut Deserializer<'s, ' V: DeserializeSeed<'de>, { // Have the seed deserialize the next atom, which should be the symbol. - let atom = self.read_atom()?; - if atom.kind == Kind::Special && matches!(atom.nucleus, Some(Nucleus::Named)) { - let val = seed.deserialize(&mut *self)?; - Ok((val, self)) - } else { - Err(Error::custom(format!( + let atom = self.peek_atom()?; + match atom.kind { + Kind::Special if matches!(atom.nucleus, Some(Nucleus::Named)) => { + self.read_atom()?; + let val = seed.deserialize(&mut *self)?; + Ok((val, self)) + } + Kind::Symbol => { + let val = seed.deserialize(&mut *self)?; + Ok((val, self)) + } + _ => Err(Error::custom(format!( "expected Named, got {:?}", atom.kind - ))) + ))), } } } diff --git a/pot/src/format.rs b/pot/src/format.rs index 6f8de02d..4bebbd44 100644 --- a/pot/src/format.rs +++ b/pot/src/format.rs @@ -3,7 +3,9 @@ use std::fmt::Display; use byteorder::{BigEndian, LittleEndian, ReadBytesExt, WriteBytesExt}; use half::f16; -pub(crate) const CURRENT_VERSION: u8 = 0; +pub(crate) const INITIAL_VERSION: u8 = 0; +pub(crate) const V4_VERSION: u8 = 1; +pub(crate) const CURRENT_VERSION: u8 = V4_VERSION; use crate::reader::{BufferedBytes, Reader}; use crate::Error; diff --git a/pot/src/lib.rs b/pot/src/lib.rs index 466337da..c2a2cff3 100644 --- a/pot/src/lib.rs +++ b/pot/src/lib.rs @@ -108,18 +108,24 @@ where #[derive(Clone, Debug)] pub struct Config { allocation_budget: usize, + compatibility: Compatibility, } impl Default for Config { #[inline] fn default() -> Self { - Self { - allocation_budget: usize::MAX, - } + Self::new() } } impl Config { + /// Returns the default configuration. + pub const fn new() -> Self { + Self { + allocation_budget: usize::MAX, + compatibility: Compatibility::const_default(), + } + } /// Sets the maximum number of bytes able to be allocated. This is not /// guaranteed to be perfectly accurate, due to the limitations of serde /// deserializers. Pot can keep track of how many bytes it thinks its @@ -133,6 +139,12 @@ impl Config { self } + /// Sets the compatibility mode for serializing and returns self. + pub const fn compatibility(mut self, compatibilty: Compatibility) -> Self { + self.compatibility = compatibilty; + self + } + /// Deserializes a value from a slice using the configured options. #[inline] pub fn deserialize<'de, T>(&self, serialized: &'de [u8]) -> Result @@ -179,779 +191,45 @@ impl Config { T: Serialize, W: WriteBytesExt, { - let mut serializer = ser::Serializer::new(writer)?; + let mut serializer = ser::Serializer::new_with_compatibility(writer, self.compatibility)?; value.serialize(&mut serializer) } } -#[cfg(test)] -mod tests { - use std::borrow::Cow; - use std::marker::PhantomData; - use std::sync::OnceLock; - - use serde::{Deserializer, Serializer}; - use serde_derive::{Deserialize, Serialize}; - - use super::*; - use crate::format::{Float, Integer, CURRENT_VERSION}; - use crate::value::Value; - - fn init_tracing() { - static INITIALIZED: OnceLock<()> = OnceLock::new(); - - INITIALIZED.get_or_init(|| { - #[cfg(not(feature = "tracing"))] - println!("To see additional logs, run tests with the `tracing` feature enabled"); - - tracing_subscriber::fmt() - .pretty() - // Enable everything. - .with_max_level(tracing::Level::TRACE) - .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ENTER) - // Set this to be the default, global collector for this application. - .init(); - }); - } - - fn test_serialization Deserialize<'de> + PartialEq + Debug>( - value: &S, - check_length: Option, - ) { - test_serialization_with(value, check_length, |value, deserialized| { - assert_eq!(value, deserialized); - }); - } - - fn test_serialization_with< - S: Serialize + for<'de> Deserialize<'de> + PartialEq + Debug, - F: FnMut(&S, &S), - >( - value: &S, - check_length: Option, - mut callback: F, - ) { - init_tracing(); - let bytes = to_vec(&value).unwrap(); - println!("{value:?}: {bytes:02x?}"); - let deserialized = from_slice::(&bytes).unwrap(); - callback(value, &deserialized); - if let Some(check_length) = check_length { - // Subtract 4 bytes from the serialized output to account for the header. - assert_eq!(bytes.len() - 4, check_length); - } - - // Do the same, but using the reader interface. - let mut bytes = Vec::new(); - to_writer(value, &mut bytes).unwrap(); - println!("{value:?}: {bytes:02x?}"); - let deserialized = from_reader(&bytes[..]).unwrap(); - callback(value, &deserialized); - } - - use std::fmt::Debug; - - #[derive(Serialize, PartialEq, Deserialize, Debug, Default)] - struct NumbersStruct { - u8: u8, - u16: u16, - char: char, - u32: u32, - u64: u64, - u128: u128, - i8: i8, - i16: i16, - i32: i32, - i64: i64, - i128: i128, - f32: f32, - f64: f64, - } - - #[derive(Serialize, PartialEq, Deserialize, Debug)] - enum EnumVariants { - Unit, - Tuple(u64), - TupleTwoArgs(u64, u64), - Struct { arg: u64 }, - } - - #[test] - fn numbers() { - test_serialization(&NumbersStruct::default(), None); - test_serialization( - &NumbersStruct { - u8: u8::MAX, - u16: u16::MAX, - char: char::MAX, - u32: u32::MAX, - u64: u64::MAX, - u128: u128::MAX, - i8: i8::MIN, - i16: i16::MIN, - i32: i32::MIN, - i64: i64::MIN, - i128: i128::MIN, - f32: 1., - f64: 1., - }, - None, - ); - } - - #[test] - fn number_packing() { - test_serialization(&0_u128, Some(2)); - test_serialization(&(2_u128.pow(8) - 1), Some(2)); - test_serialization(&2_u128.pow(8), Some(3)); - test_serialization(&(2_u128.pow(16) - 1), Some(3)); - test_serialization(&2_u128.pow(16), Some(4)); - test_serialization(&(2_u128.pow(24) - 1), Some(4)); - test_serialization(&2_u128.pow(24), Some(5)); - test_serialization(&(2_u128.pow(32) - 1), Some(5)); - test_serialization(&2_u128.pow(32), Some(7)); - test_serialization(&(2_u128.pow(48) - 1), Some(7)); - test_serialization(&2_u128.pow(48), Some(9)); - test_serialization(&(2_u128.pow(64) - 1), Some(9)); - test_serialization(&2_u128.pow(64), Some(17)); - - test_serialization(&0_i128, Some(2)); - test_serialization(&(2_i128.pow(7) - 1), Some(2)); - test_serialization(&2_i128.pow(7), Some(3)); - test_serialization(&(2_i128.pow(15) - 1), Some(3)); - test_serialization(&2_i128.pow(15), Some(4)); - test_serialization(&(2_i128.pow(23) - 1), Some(4)); - test_serialization(&2_i128.pow(23), Some(5)); - test_serialization(&(2_i128.pow(31) - 1), Some(5)); - test_serialization(&2_i128.pow(31), Some(7)); - test_serialization(&(2_i128.pow(47) - 1), Some(7)); - test_serialization(&2_i128.pow(47), Some(9)); - test_serialization(&-(2_i128.pow(7)), Some(2)); - test_serialization(&-(2_i128.pow(7) + 1), Some(3)); - test_serialization(&-(2_i128.pow(15)), Some(3)); - test_serialization(&-(2_i128.pow(15) + 1), Some(4)); - test_serialization(&-(2_i128.pow(23)), Some(4)); - test_serialization(&-(2_i128.pow(23) + 1), Some(5)); - test_serialization(&-(2_i128.pow(31)), Some(5)); - test_serialization(&-(2_i128.pow(31) + 1), Some(7)); - test_serialization(&-(2_i128.pow(47)), Some(7)); - test_serialization(&-(2_i128.pow(47) + 1), Some(9)); - test_serialization(&-(2_i128.pow(63)), Some(9)); - test_serialization(&-(2_i128.pow(63) + 1), Some(17)); - - // Float packing relies on bitwise conversions and is lossless. - test_serialization(&f64::INFINITY, Some(3)); - test_serialization(&f64::NEG_INFINITY, Some(3)); - test_serialization(&0_f64, Some(3)); - test_serialization(&-0_f64, Some(3)); - test_serialization(&0.1_f64, Some(9)); - test_serialization(&0.1_f32, Some(5)); - } - - #[test] - fn tuples() { - test_serialization(&(1, true, 3), None); - } - - #[test] - fn enums() { - test_serialization(&EnumVariants::Unit, None); - - test_serialization(&EnumVariants::Tuple(0), None); - - test_serialization(&EnumVariants::TupleTwoArgs(1, 2), None); - - test_serialization(&EnumVariants::Struct { arg: 3 }, None); - - test_serialization(&Some(EnumVariants::Unit), None); - } - - #[test] - fn vectors() { - test_serialization(&vec![0_u64, 1], None); - test_serialization( - &vec![NumbersStruct::default(), NumbersStruct::default()], - None, - ); - } - - #[test] - fn option() { - test_serialization(&Option::::None, None); - test_serialization(&Some(0_u64), None); - test_serialization(&Some(u64::MAX), None); - } - - #[test] - fn phantom() { - test_serialization(&PhantomData::, None); - } - - #[derive(Serialize, PartialEq, Deserialize, Debug, Default)] - struct StringsAndBytes<'a> { - bytes: Cow<'a, [u8]>, - #[serde(with = "serde_bytes")] - bytes_borrowed: Cow<'a, [u8]>, - #[serde(with = "serde_bytes")] - serde_bytes_byte_slice: &'a [u8], - #[serde(with = "serde_bytes")] - serde_bytes_byte_vec: Vec, - str_ref: &'a str, - string: String, - } - - #[test] - fn borrowing_data() { - let original = StringsAndBytes { - bytes: Cow::Borrowed(b"hello"), - bytes_borrowed: Cow::Borrowed(b"hello"), - serde_bytes_byte_slice: b"hello", - serde_bytes_byte_vec: b"world".to_vec(), - str_ref: "hello", - string: String::from("world"), - }; - let serialized = to_vec(&original).unwrap(); - let deserialized = from_slice(&serialized).unwrap(); - assert_eq!(original, deserialized); - assert!(matches!(deserialized.bytes_borrowed, Cow::Borrowed(_))); - } - - #[test] - fn limiting_input() { - let original = StringsAndBytes { - bytes: Cow::Borrowed(b"hello"), - bytes_borrowed: Cow::Borrowed(b"hello"), - serde_bytes_byte_slice: b"hello", - serde_bytes_byte_vec: b"world".to_vec(), - str_ref: "hello", - string: String::from("world"), - }; - let serialized = to_vec(&original).unwrap(); - // There are 6 values that contain 5 bytes each. A limit of 30 should be perfect. - assert!(Config::default() - .allocation_budget(30) - .deserialize::>(&serialized) - .is_ok()); - assert!(Config::default() - .allocation_budget(29) - .deserialize::>(&serialized) - .is_err()); - - // Test number limits. - let serialized = to_vec(&NumbersStruct { - u8: u8::MAX, - u16: u16::MAX, - char: char::MAX, - u32: u32::MAX, - u64: u64::MAX, - u128: u128::MAX, - i8: i8::MIN, - i16: i16::MIN, - i32: i32::MIN, - i64: i64::MIN, - i128: i128::MIN, - f32: f32::MAX, - f64: f64::MIN, - }) - .unwrap(); - assert!(Config::default() - .allocation_budget(78) - .deserialize::(&serialized) - .is_ok()); - assert!(Config::default() - .allocation_budget(77) - .deserialize::(&serialized) - .is_err()); - } - - #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] - struct TupleStruct(u32, u8); - - #[test] - fn tuple_struct() { - test_serialization(&TupleStruct(1, 2), None); - } - - #[test] - fn value() { - macro_rules! roundtrip { - ($value:expr) => {{ - assert_eq!( - from_slice::>(&to_vec(&$value).unwrap()).unwrap(), - $value - ); - }}; - } - - roundtrip!(Value::None); - roundtrip!(Value::Unit); - roundtrip!(Value::Bool(true)); - roundtrip!(Value::Bool(false)); - roundtrip!(Value::Integer(Integer::from(i8::MAX))); - roundtrip!(Value::Integer(Integer::from(i16::MAX))); - roundtrip!(Value::Integer(Integer::from(i32::MAX))); - roundtrip!(Value::Integer(Integer::from(i64::MAX))); - roundtrip!(Value::Integer(Integer::from(i128::MAX))); - roundtrip!(Value::Integer(Integer::from(u8::MAX))); - roundtrip!(Value::Integer(Integer::from(u16::MAX))); - roundtrip!(Value::Integer(Integer::from(u32::MAX))); - roundtrip!(Value::Integer(Integer::from(u64::MAX))); - roundtrip!(Value::Integer(Integer::from(u128::MAX))); - roundtrip!(Value::Float(Float::from(std::f64::consts::PI))); - roundtrip!(Value::Float(Float::from(std::f32::consts::PI))); - roundtrip!(Value::Sequence(vec![Value::None])); - roundtrip!(Value::Mappings(vec![(Value::None, Value::Unit)])); - - let original_value = Value::Bytes(Cow::Borrowed(b"hello")); - let encoded_bytes = to_vec(&original_value).unwrap(); - let borrowed_decoded: Value<'_> = from_slice(&encoded_bytes).unwrap(); - assert_eq!(Value::String(Cow::Borrowed("hello")), borrowed_decoded); - assert!(matches!(borrowed_decoded, Value::String(Cow::Borrowed(_)))); - - let original_value = Value::Bytes(Cow::Borrowed(b"\xFE\xED\xD0\xD0")); - let encoded_bytes = to_vec(&original_value).unwrap(); - let borrowed_decoded: Value<'_> = from_slice(&encoded_bytes).unwrap(); - assert_eq!( - Value::Bytes(Cow::Borrowed(b"\xFE\xED\xD0\xD0")), - borrowed_decoded - ); - assert!(matches!(borrowed_decoded, Value::Bytes(Cow::Borrowed(_)))); - } - - #[test] - fn incompatible_version() { - let mut incompatible_header = Vec::new(); - format::write_header(&mut incompatible_header, CURRENT_VERSION + 1).unwrap(); - assert!(matches!( - from_slice::<()>(&incompatible_header), - Err(Error::IncompatibleVersion) - )); - } - - #[test] - fn invalid_char_cast() { - let bytes = to_vec(&0x11_0000_u32).unwrap(); - - assert!(matches!( - from_slice::(&bytes), - Err(Error::InvalidUtf8(_)) - )); - } - - #[test] - fn bytes_to_identifier() { - let mut valid_bytes = Vec::new(); - format::write_header(&mut valid_bytes, CURRENT_VERSION).unwrap(); - format::write_named(&mut valid_bytes).unwrap(); - format::write_bytes(&mut valid_bytes, b"Unit").unwrap(); - - assert_eq!( - from_slice::(&valid_bytes).unwrap(), - EnumVariants::Unit - ); - - let mut invalid_bytes = Vec::new(); - format::write_header(&mut invalid_bytes, CURRENT_VERSION).unwrap(); - format::write_named(&mut invalid_bytes).unwrap(); - format::write_bytes(&mut invalid_bytes, &0xFFFF_FFFF_u32.to_be_bytes()).unwrap(); - - assert!(matches!( - from_slice::(&invalid_bytes), - Err(Error::InvalidUtf8(_)) - )); - } - - #[test] - fn invalid_symbol() { - let mut valid_bytes = Vec::new(); - format::write_header(&mut valid_bytes, CURRENT_VERSION).unwrap(); - format::write_atom_header(&mut valid_bytes, format::Kind::Symbol, 4).unwrap(); - format::write_bytes(&mut valid_bytes, &0xFFFF_FFFF_u32.to_be_bytes()).unwrap(); - - assert!(matches!( - from_slice::>(&valid_bytes), - Err(Error::InvalidUtf8(_)) - )); - } - - #[test] - fn unknown_special() { - let mut invalid_bytes = Vec::new(); - format::write_header(&mut invalid_bytes, CURRENT_VERSION).unwrap(); - format::write_atom_header( - &mut invalid_bytes, - format::Kind::Special, - format::SPECIAL_COUNT, - ) - .unwrap(); - - assert!(from_slice::<()>(&invalid_bytes).is_err()); - } - - /// In `BonsaiDb`, sometimes it's nice to use a `()` as an associated type - /// as a default. To allow changing data that was previously serialized as a - /// `()` but now has a new type, Pot allows converting between unit types - /// and defaults of all major serialized types. The net effect is if you - /// start with a derived `BonsaiDb` view with no `value =` argument, `()` is - /// used instead. With this flexibility, changing the value type to another - /// type will sometimes be able to work without requiring rebuilding the - /// views on deployment. - #[test] - #[allow(clippy::cognitive_complexity)] - fn unit_adaptations() { - #[derive(Deserialize)] - struct Test { - #[serde(default)] - value: u32, - } - - let unit = to_vec(&()).unwrap(); - assert!(from_slice::>(&unit).unwrap().is_some()); - assert_eq!(from_slice::(&unit).unwrap().value, 0); - assert_eq!(from_slice::<&[u8]>(&unit).unwrap(), b""); - assert_eq!(from_slice::(&unit).unwrap(), b""); - assert_eq!(from_slice::<&str>(&unit).unwrap(), ""); - assert_eq!(from_slice::(&unit).unwrap(), 0); - assert_eq!(from_slice::(&unit).unwrap(), 0); - assert_eq!(from_slice::(&unit).unwrap(), 0); - assert_eq!(from_slice::(&unit).unwrap(), 0); - assert_eq!(from_slice::(&unit).unwrap(), 0); - assert_eq!(from_slice::(&unit).unwrap(), 0); - assert_eq!(from_slice::(&unit).unwrap(), 0); - assert_eq!(from_slice::(&unit).unwrap(), 0); - assert_eq!(from_slice::(&unit).unwrap(), 0); - assert_eq!(from_slice::(&unit).unwrap(), 0); - assert!(!from_slice::(&unit).unwrap()); - - let none = to_vec(&Option::<()>::None).unwrap(); - assert!(from_slice::>(&none).unwrap().is_none()); - assert!(from_slice::>(&none).unwrap().is_none()); - assert_eq!(from_slice::<&[u8]>(&none).unwrap(), b""); - assert_eq!(from_slice::(&none).unwrap(), b""); - assert_eq!(from_slice::<&str>(&none).unwrap(), ""); - assert_eq!(from_slice::(&none).unwrap(), 0); - assert_eq!(from_slice::(&none).unwrap(), 0); - assert_eq!(from_slice::(&none).unwrap(), 0); - assert_eq!(from_slice::(&none).unwrap(), 0); - assert_eq!(from_slice::(&none).unwrap(), 0); - assert_eq!(from_slice::(&none).unwrap(), 0); - assert_eq!(from_slice::(&none).unwrap(), 0); - assert_eq!(from_slice::(&none).unwrap(), 0); - assert_eq!(from_slice::(&none).unwrap(), 0); - assert_eq!(from_slice::(&none).unwrap(), 0); - assert!(!from_slice::(&none).unwrap()); - } - - #[test] - fn invalid_numbers() { - let mut invalid_float_byte_len = Vec::new(); - format::write_header(&mut invalid_float_byte_len, CURRENT_VERSION).unwrap(); - format::write_atom_header(&mut invalid_float_byte_len, format::Kind::Float, 0).unwrap(); - - assert!(from_slice::(&invalid_float_byte_len).is_err()); - - assert!(format::Float::read_from( - format::Kind::Symbol, - 0, - &mut &invalid_float_byte_len[..] - ) - .is_err(),); - - let mut invalid_signed_byte_len = Vec::new(); - format::write_header(&mut invalid_signed_byte_len, CURRENT_VERSION).unwrap(); - format::write_atom_header(&mut invalid_signed_byte_len, format::Kind::Int, 10).unwrap(); - - assert!(from_slice::(&invalid_signed_byte_len).is_err()); - - assert!(format::Integer::read_from( - format::Kind::Symbol, - 0, - &mut &invalid_signed_byte_len[..] - ) - .is_err(),); - - let mut invalid_unsigned_byte_len = Vec::new(); - format::write_header(&mut invalid_unsigned_byte_len, CURRENT_VERSION).unwrap(); - format::write_atom_header(&mut invalid_unsigned_byte_len, format::Kind::UInt, 10).unwrap(); - - assert!(from_slice::(&invalid_unsigned_byte_len).is_err()); - } - - #[test] - #[allow(clippy::unnecessary_mut_passed)] // It's necessary. - fn not_human_readable() { - let mut bytes = Vec::new(); - let mut serializer = ser::Serializer::new(&mut bytes).unwrap(); - assert!(!(&mut serializer).is_human_readable()); - ().serialize(&mut serializer).unwrap(); - - let bytes = to_vec(&()).unwrap(); - let mut deserializer = de::Deserializer::from_slice(&bytes, usize::MAX).unwrap(); - assert!(!(&mut deserializer).is_human_readable()); - } - - #[test] - fn unexpected_eof() { - let mut invalid_bytes = Vec::new(); - format::write_header(&mut invalid_bytes, CURRENT_VERSION).unwrap(); - format::write_atom_header(&mut invalid_bytes, format::Kind::Bytes, 10).unwrap(); - assert!(matches!( - from_slice::>(&invalid_bytes), - Err(Error::Eof) - )); - } - - #[test] - fn too_big_read() { - let mut invalid_bytes = Vec::new(); - format::write_header(&mut invalid_bytes, CURRENT_VERSION).unwrap(); - format::write_atom_header(&mut invalid_bytes, format::Kind::Bytes, 10).unwrap(); - assert!(matches!( - Config::default() - .allocation_budget(9) - .deserialize::>(&invalid_bytes), - Err(Error::TooManyBytesRead) - )); - } - - #[derive(Serialize, Deserialize, Debug, PartialEq)] - struct Flatten { - #[serde(flatten)] - structure: Flattened, - #[serde(flatten)] - enumeration: EnumVariants, - } - - #[derive(Serialize, Deserialize, Debug, PartialEq)] - struct Flattened { - field: String, - } - - #[test] - fn test_flatten() { - test_serialization( - &Flatten { - structure: Flattened { - field: String::from("flat"), - }, - enumeration: EnumVariants::Struct { arg: 1 }, - }, - None, - ); - } - - #[test] - fn direct_value_serialization() { - fn roundtrip Deserialize<'de> + PartialEq + Debug>(value: &T) { - let as_value = Value::from_serialize(value).unwrap(); - let deserialized = as_value.deserialize_as::().unwrap(); - assert_eq!(&deserialized, value); - } - - roundtrip(&NumbersStruct { - u8: u8::MAX, - u16: u16::MAX, - char: char::MAX, - u32: u32::MAX, - u64: u64::MAX, - u128: u128::MAX, - i8: i8::MIN, - i16: i16::MIN, - i32: i32::MIN, - i64: i64::MIN, - i128: i128::MIN, - f32: f32::MAX, - f64: f64::MIN, - }); - - roundtrip(&EnumVariants::Struct { arg: 1 }); - roundtrip(&EnumVariants::Tuple(1)); - roundtrip(&EnumVariants::TupleTwoArgs(1, 2)); - roundtrip(&EnumVariants::Unit); - roundtrip(&Some(1_u32)); - roundtrip(&"hello".to_string()); - roundtrip(&b"hello".to_vec()); - } - - #[test] - fn borrowed_value_serialization() { - #[track_caller] - fn check(value: &T) - where - T: Serialize + Debug, - U: Debug + PartialEq + for<'de> Deserialize<'de>, - { - let as_value = Value::from_serialize(value).unwrap(); - let deserialized = as_value.deserialize_as::().unwrap(); - assert_eq!(&deserialized, value); - } - - check::<_, Vec>(&b"hello"); - check::<_, String>(&"hello"); - } - - #[test] - fn value_error() { - #[derive(Debug)] - struct Fallible; - - impl Serialize for Fallible { - fn serialize(&self, _serializer: S) -> Result - where - S: Serializer, - { - Err(serde::ser::Error::custom("oh no!")) - } - } - - assert_eq!( - Value::from_serialize(Fallible), - Err(ValueError::Custom(String::from("oh no!"))) - ); - } - - #[test] - fn persistent_symbols_slice() { - let mut sender = ser::SymbolMap::default(); - let mut receiver = de::SymbolList::default(); - - let mut bytes = sender.serialize_to_vec(&NumbersStruct::default()).unwrap(); - let _result = receiver.deserialize_slice::(&bytes).unwrap(); - let symbol_count_after_first_send = receiver.len(); - let first_payload_len = bytes.len(); - - // Send again, confirm the symbol list didn't grow. - bytes.clear(); - sender - .serialize_to(&mut bytes, &NumbersStruct::default()) - .unwrap(); - let _result = receiver.deserialize_slice::(&bytes).unwrap(); - assert_eq!(symbol_count_after_first_send, receiver.len()); - println!( - "First: {first_payload_len} bytes; Second: {} bytes", - bytes.len() - ); - assert!(first_payload_len > bytes.len()); - } - - #[test] - fn persistent_symbols_read() { - let mut sender = ser::SymbolMap::default(); - let mut receiver = de::SymbolList::default(); - - let mut bytes = sender.serialize_to_vec(&NumbersStruct::default()).unwrap(); - let _result = receiver - .deserialize_from::(&bytes[..]) - .unwrap(); - let symbol_count_after_first_send = receiver.len(); - let first_payload_len = bytes.len(); - - // Send again, confirm the symbol list didn't grow. - bytes.clear(); - sender - .serialize_to(&mut bytes, &NumbersStruct::default()) - .unwrap(); - let _result = receiver - .deserialize_from::(&bytes[..]) - .unwrap(); - assert_eq!(symbol_count_after_first_send, receiver.len()); - println!( - "First: {first_payload_len} bytes; Second: {} bytes", - bytes.len() - ); - assert!(first_payload_len > bytes.len()); - } - - #[test] - fn symbol_map_serialization() { - #[derive(Serialize, Deserialize, Default, Eq, PartialEq, Debug)] - struct Payload { - a: usize, - b: usize, - } - - let mut sender = crate::ser::SymbolMap::default(); - assert!(sender.is_empty()); - let mut receiver = crate::de::SymbolMap::new(); - assert!(receiver.is_empty()); - - // Send the first payload, populating the map. - let mut bytes = sender.serialize_to_vec(&Payload::default()).unwrap(); - assert_eq!(sender.len(), 2); - - assert_eq!( - receiver.deserialize_slice::(&bytes).unwrap(), - Payload::default() - ); - assert_eq!(receiver.len(), 2); - - // Serialize the maps. - let serialized_sender = crate::to_vec(&sender).unwrap(); - let serialized_receiver = crate::to_vec(&receiver).unwrap(); - // The serialization formats are the same despite using different - // in-memory representations. This allows pre-serializing a dictionary - // before starting the intial payload. - assert_eq!(serialized_sender, serialized_receiver); - let mut deserialized_sender = - crate::from_slice::(&serialized_sender).unwrap(); - let mut deserialized_receiver = - crate::from_slice::(&serialized_receiver).unwrap(); - - // Create a new payload and serialize it. Ensure the payloads produced - // by the serialized map and the original map are identical. - let new_payload = Payload { a: 1, b: 2 }; - bytes.clear(); - sender.serialize_to(&mut bytes, &new_payload).unwrap(); - let from_serialized_sender = deserialized_sender.serialize_to_vec(&new_payload).unwrap(); - assert_eq!(bytes, from_serialized_sender); - - // Deserialize the payload - assert_eq!( - receiver.deserialize_slice::(&bytes).unwrap(), - new_payload - ); - assert_eq!( - deserialized_receiver - .deserialize_slice::(&bytes) - .unwrap(), - new_payload - ); - } +/// Compatibility settings for Pot. +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +#[non_exhaustive] +pub enum Compatibility { + /// Serializes data that is compatible with all versions of Pot + /// deserializers. + /// + /// This format does not support [`Value`](crate::Value) deserialization of + /// enum variants without associated data. See [`V5`](Self::V5) for more + /// information. + Full, + /// Serializes data in the default format + /// + /// This format has a single change in how enum variants without associated + /// data are serialized. This change allows `deserialize_any` to + /// unambiguously distinguish between variants with associated data and + /// variants without associated data. + /// + /// This will be the default compatibility setting in `v4.0` and later. All + /// versions after `v3.0.1` are able to read this updated format. + V4, +} - #[test] - fn symbol_map_population() { - let mut map = crate::ser::SymbolMap::default(); - map.populate_from(&NumbersStruct::default()).unwrap(); - map.populate_from(&EnumVariants::Struct { arg: 1 }).unwrap(); - map.populate_from(&EnumVariants::Tuple(0)).unwrap(); - map.populate_from(&EnumVariants::TupleTwoArgs(0, 1)) - .unwrap(); - assert_eq!(map.populate_from(&EnumVariants::Unit).unwrap(), 1); - assert_eq!(map.populate_from(&EnumVariants::Unit).unwrap(), 0); - dbg!(map); +impl Compatibility { + const fn const_default() -> Self { + Self::Full } +} - #[test] - fn backwards_compatible() { - #[derive(Debug, Serialize, Deserialize, Eq, PartialEq)] - struct Canary { - name: String, - id: u64, - } - - let canary = Canary { - name: String::from("coalmine"), - id: 0xfeed_d0d0_dead_beef, - }; - - // This payload was generated with pot 1.0 using the same structure. - // This structure should be updated to be more encompassing, but this at - // least tests for basic compatibility. - let v1_canary = [ - 80, 111, 116, 0, 162, 200, 110, 97, 109, 101, 232, 99, 111, 97, 108, 109, 105, 110, - 101, 196, 105, 100, 71, 239, 190, 173, 222, 208, 208, 237, 254, - ]; - let parsed: Canary = crate::from_slice(&v1_canary).unwrap(); - assert_eq!(canary, parsed); +impl Default for Compatibility { + fn default() -> Self { + Self::const_default() } } + +#[cfg(test)] +mod tests; diff --git a/pot/src/ser.rs b/pot/src/ser.rs index c0138a56..a12e1771 100644 --- a/pot/src/ser.rs +++ b/pot/src/ser.rs @@ -11,12 +11,13 @@ use serde::{ser, Deserialize, Serialize}; #[cfg(feature = "tracing")] use tracing::instrument; -use crate::format::{self, Kind, Special, CURRENT_VERSION}; -use crate::{Error, Result}; +use crate::format::{self, Kind, Special, INITIAL_VERSION, V4_VERSION}; +use crate::{Compatibility, Error, Result}; /// A Pot serializer. pub struct Serializer<'a, W: WriteBytesExt> { symbol_map: SymbolMapRef<'a>, + compatibility: Compatibility, output: W, bytes_written: usize, } @@ -34,15 +35,33 @@ impl<'a, W: WriteBytesExt> Serializer<'a, W> { /// Returns a new serializer outputting written bytes into `output`. #[inline] pub fn new(output: W) -> Result { + Self::new_with_compatibility(output, Compatibility::default()) + } + + /// Returns a new serializer outputting written bytes into `output`. + #[inline] + pub fn new_with_compatibility(output: W, compatibility: Compatibility) -> Result { Self::new_with_symbol_map( output, SymbolMapRef::Ephemeral(EphemeralSymbolMap::default()), + compatibility, ) } - fn new_with_symbol_map(mut output: W, symbol_map: SymbolMapRef<'a>) -> Result { - let bytes_written = format::write_header(&mut output, CURRENT_VERSION)?; + fn new_with_symbol_map( + mut output: W, + symbol_map: SymbolMapRef<'a>, + compatibility: Compatibility, + ) -> Result { + let bytes_written = format::write_header( + &mut output, + match compatibility { + Compatibility::Full => INITIAL_VERSION, + Compatibility::V4 => V4_VERSION, + }, + )?; Ok(Self { + compatibility, symbol_map, output, bytes_written, @@ -235,7 +254,9 @@ impl<'de, 'a: 'de, W: WriteBytesExt + 'a> ser::Serializer for &'de mut Serialize _variant_index: u32, variant: &'static str, ) -> Result<()> { - format::write_named(&mut self.output)?; + if matches!(self.compatibility, Compatibility::Full) { + self.bytes_written += format::write_named(&mut self.output)?; + } self.write_symbol(variant)?; Ok(()) } @@ -551,6 +572,7 @@ pub struct SymbolMap { symbols: String, entries: Vec<(Range, u32)>, static_lookup: Vec<(usize, u32)>, + compatibility: Compatibility, } impl Debug for SymbolMap { @@ -578,14 +600,27 @@ impl SymbolMap { symbols: String::new(), entries: Vec::new(), static_lookup: Vec::new(), + compatibility: Compatibility::const_default(), } } + /// Sets the compatibility mode for serializing and returns self. + pub const fn with_compatibility(mut self, compatibility: Compatibility) -> Self { + self.compatibility = compatibility; + self + } + + /// Sets the compatibility mode for serializing. + pub fn set_compatibility(&mut self, compatibility: Compatibility) { + self.compatibility = compatibility; + } + /// Returns a serializer that writes into `output` and persists symbols /// into `self`. #[inline] pub fn serializer_for(&mut self, output: W) -> Result> { - Serializer::new_with_symbol_map(output, SymbolMapRef::Persistent(self)) + let compatibility = self.compatibility; + Serializer::new_with_symbol_map(output, SymbolMapRef::Persistent(self), compatibility) } /// Serializes `value` into `writer` while persisting symbols into `self`. diff --git a/pot/src/tests.rs b/pot/src/tests.rs new file mode 100644 index 00000000..b97ca4d5 --- /dev/null +++ b/pot/src/tests.rs @@ -0,0 +1,766 @@ +use serde_derive::Deserialize; +use serde_derive::Serialize; + +use std::borrow::Cow; +use std::marker::PhantomData; +use std::sync::OnceLock; + +use serde::{Deserializer, Serializer}; + +use super::*; +use crate::format::{Float, Integer, CURRENT_VERSION}; +use crate::value::Value; + +fn init_tracing() { + static INITIALIZED: OnceLock<()> = OnceLock::new(); + + INITIALIZED.get_or_init(|| { + #[cfg(not(feature = "tracing"))] + println!("To see additional logs, run tests with the `tracing` feature enabled"); + + tracing_subscriber::fmt() + .pretty() + // Enable everything. + .with_max_level(tracing::Level::TRACE) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ENTER) + // Set this to be the default, global collector for this application. + .init(); + }); +} + +fn test_serialization Deserialize<'de> + PartialEq + Debug>( + value: &S, + check_length: Option, +) { + test_serialization_with(value, check_length, |value, deserialized| { + assert_eq!(value, deserialized); + }); +} + +fn test_serialization_with< + S: Serialize + for<'de> Deserialize<'de> + PartialEq + Debug, + F: FnMut(&S, &S), +>( + value: &S, + check_length: Option, + mut callback: F, +) { + init_tracing(); + let bytes = to_vec(&value).unwrap(); + println!("{value:?}: {bytes:02x?}"); + let deserialized = from_slice::(&bytes).unwrap(); + callback(value, &deserialized); + if let Some(check_length) = check_length { + // Subtract 4 bytes from the serialized output to account for the header. + assert_eq!(bytes.len() - 4, check_length); + } + + // Do the same, but using the reader interface. + let mut bytes = Vec::new(); + to_writer(value, &mut bytes).unwrap(); + println!("{value:?}: {bytes:02x?}"); + let deserialized = from_reader(&bytes[..]).unwrap(); + callback(value, &deserialized); +} + +use std::fmt::Debug; + +#[derive(Serialize, PartialEq, Deserialize, Debug, Default)] +struct NumbersStruct { + u8: u8, + u16: u16, + char: char, + u32: u32, + u64: u64, + u128: u128, + i8: i8, + i16: i16, + i32: i32, + i64: i64, + i128: i128, + f32: f32, + f64: f64, +} + +#[derive(Serialize, PartialEq, Deserialize, Debug)] +enum EnumVariants { + Unit, + Tuple(u64), + TupleTwoArgs(u64, u64), + Struct { arg: u64 }, +} + +#[test] +fn numbers() { + test_serialization(&NumbersStruct::default(), None); + test_serialization( + &NumbersStruct { + u8: u8::MAX, + u16: u16::MAX, + char: char::MAX, + u32: u32::MAX, + u64: u64::MAX, + u128: u128::MAX, + i8: i8::MIN, + i16: i16::MIN, + i32: i32::MIN, + i64: i64::MIN, + i128: i128::MIN, + f32: 1., + f64: 1., + }, + None, + ); +} + +#[test] +fn number_packing() { + test_serialization(&0_u128, Some(2)); + test_serialization(&(2_u128.pow(8) - 1), Some(2)); + test_serialization(&2_u128.pow(8), Some(3)); + test_serialization(&(2_u128.pow(16) - 1), Some(3)); + test_serialization(&2_u128.pow(16), Some(4)); + test_serialization(&(2_u128.pow(24) - 1), Some(4)); + test_serialization(&2_u128.pow(24), Some(5)); + test_serialization(&(2_u128.pow(32) - 1), Some(5)); + test_serialization(&2_u128.pow(32), Some(7)); + test_serialization(&(2_u128.pow(48) - 1), Some(7)); + test_serialization(&2_u128.pow(48), Some(9)); + test_serialization(&(2_u128.pow(64) - 1), Some(9)); + test_serialization(&2_u128.pow(64), Some(17)); + + test_serialization(&0_i128, Some(2)); + test_serialization(&(2_i128.pow(7) - 1), Some(2)); + test_serialization(&2_i128.pow(7), Some(3)); + test_serialization(&(2_i128.pow(15) - 1), Some(3)); + test_serialization(&2_i128.pow(15), Some(4)); + test_serialization(&(2_i128.pow(23) - 1), Some(4)); + test_serialization(&2_i128.pow(23), Some(5)); + test_serialization(&(2_i128.pow(31) - 1), Some(5)); + test_serialization(&2_i128.pow(31), Some(7)); + test_serialization(&(2_i128.pow(47) - 1), Some(7)); + test_serialization(&2_i128.pow(47), Some(9)); + test_serialization(&-(2_i128.pow(7)), Some(2)); + test_serialization(&-(2_i128.pow(7) + 1), Some(3)); + test_serialization(&-(2_i128.pow(15)), Some(3)); + test_serialization(&-(2_i128.pow(15) + 1), Some(4)); + test_serialization(&-(2_i128.pow(23)), Some(4)); + test_serialization(&-(2_i128.pow(23) + 1), Some(5)); + test_serialization(&-(2_i128.pow(31)), Some(5)); + test_serialization(&-(2_i128.pow(31) + 1), Some(7)); + test_serialization(&-(2_i128.pow(47)), Some(7)); + test_serialization(&-(2_i128.pow(47) + 1), Some(9)); + test_serialization(&-(2_i128.pow(63)), Some(9)); + test_serialization(&-(2_i128.pow(63) + 1), Some(17)); + + // Float packing relies on bitwise conversions and is lossless. + test_serialization(&f64::INFINITY, Some(3)); + test_serialization(&f64::NEG_INFINITY, Some(3)); + test_serialization(&0_f64, Some(3)); + test_serialization(&-0_f64, Some(3)); + test_serialization(&0.1_f64, Some(9)); + test_serialization(&0.1_f32, Some(5)); +} + +#[test] +fn tuples() { + test_serialization(&(1, true, 3), None); +} + +#[test] +fn enums() { + test_serialization(&EnumVariants::Unit, None); + + test_serialization(&EnumVariants::Tuple(0), None); + + test_serialization(&EnumVariants::TupleTwoArgs(1, 2), None); + + test_serialization(&EnumVariants::Struct { arg: 3 }, None); + + test_serialization(&Some(EnumVariants::Unit), None); +} + +#[test] +fn vectors() { + test_serialization(&vec![0_u64, 1], None); + test_serialization( + &vec![NumbersStruct::default(), NumbersStruct::default()], + None, + ); +} + +#[test] +fn option() { + test_serialization(&Option::::None, None); + test_serialization(&Some(0_u64), None); + test_serialization(&Some(u64::MAX), None); +} + +#[test] +fn phantom() { + test_serialization(&PhantomData::, None); +} + +#[derive(Serialize, PartialEq, Deserialize, Debug, Default)] +struct StringsAndBytes<'a> { + bytes: Cow<'a, [u8]>, + #[serde(with = "serde_bytes")] + bytes_borrowed: Cow<'a, [u8]>, + #[serde(with = "serde_bytes")] + serde_bytes_byte_slice: &'a [u8], + #[serde(with = "serde_bytes")] + serde_bytes_byte_vec: Vec, + str_ref: &'a str, + string: String, +} + +#[test] +fn borrowing_data() { + let original = StringsAndBytes { + bytes: Cow::Borrowed(b"hello"), + bytes_borrowed: Cow::Borrowed(b"hello"), + serde_bytes_byte_slice: b"hello", + serde_bytes_byte_vec: b"world".to_vec(), + str_ref: "hello", + string: String::from("world"), + }; + let serialized = to_vec(&original).unwrap(); + let deserialized = from_slice(&serialized).unwrap(); + assert_eq!(original, deserialized); + assert!(matches!(deserialized.bytes_borrowed, Cow::Borrowed(_))); +} + +#[test] +fn limiting_input() { + let original = StringsAndBytes { + bytes: Cow::Borrowed(b"hello"), + bytes_borrowed: Cow::Borrowed(b"hello"), + serde_bytes_byte_slice: b"hello", + serde_bytes_byte_vec: b"world".to_vec(), + str_ref: "hello", + string: String::from("world"), + }; + let serialized = to_vec(&original).unwrap(); + // There are 6 values that contain 5 bytes each. A limit of 30 should be perfect. + assert!(Config::default() + .allocation_budget(30) + .deserialize::>(&serialized) + .is_ok()); + assert!(Config::default() + .allocation_budget(29) + .deserialize::>(&serialized) + .is_err()); + + // Test number limits. + let serialized = to_vec(&NumbersStruct { + u8: u8::MAX, + u16: u16::MAX, + char: char::MAX, + u32: u32::MAX, + u64: u64::MAX, + u128: u128::MAX, + i8: i8::MIN, + i16: i16::MIN, + i32: i32::MIN, + i64: i64::MIN, + i128: i128::MIN, + f32: f32::MAX, + f64: f64::MIN, + }) + .unwrap(); + assert!(Config::default() + .allocation_budget(78) + .deserialize::(&serialized) + .is_ok()); + assert!(Config::default() + .allocation_budget(77) + .deserialize::(&serialized) + .is_err()); +} + +#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] +struct TupleStruct(u32, u8); + +#[test] +fn tuple_struct() { + test_serialization(&TupleStruct(1, 2), None); +} + +#[test] +fn value() { + macro_rules! roundtrip { + ($value:expr) => {{ + assert_eq!( + from_slice::>(&to_vec(&$value).unwrap()).unwrap(), + $value + ); + }}; + } + + roundtrip!(Value::None); + roundtrip!(Value::Unit); + roundtrip!(Value::Bool(true)); + roundtrip!(Value::Bool(false)); + roundtrip!(Value::Integer(Integer::from(i8::MAX))); + roundtrip!(Value::Integer(Integer::from(i16::MAX))); + roundtrip!(Value::Integer(Integer::from(i32::MAX))); + roundtrip!(Value::Integer(Integer::from(i64::MAX))); + roundtrip!(Value::Integer(Integer::from(i128::MAX))); + roundtrip!(Value::Integer(Integer::from(u8::MAX))); + roundtrip!(Value::Integer(Integer::from(u16::MAX))); + roundtrip!(Value::Integer(Integer::from(u32::MAX))); + roundtrip!(Value::Integer(Integer::from(u64::MAX))); + roundtrip!(Value::Integer(Integer::from(u128::MAX))); + roundtrip!(Value::Float(Float::from(std::f64::consts::PI))); + roundtrip!(Value::Float(Float::from(std::f32::consts::PI))); + roundtrip!(Value::Sequence(vec![Value::None])); + roundtrip!(Value::Mappings(vec![(Value::None, Value::Unit)])); + + let original_value = Value::Bytes(Cow::Borrowed(b"hello")); + let encoded_bytes = to_vec(&original_value).unwrap(); + let borrowed_decoded: Value<'_> = from_slice(&encoded_bytes).unwrap(); + assert_eq!(Value::String(Cow::Borrowed("hello")), borrowed_decoded); + assert!(matches!(borrowed_decoded, Value::String(Cow::Borrowed(_)))); + + let original_value = Value::Bytes(Cow::Borrowed(b"\xFE\xED\xD0\xD0")); + let encoded_bytes = to_vec(&original_value).unwrap(); + let borrowed_decoded: Value<'_> = from_slice(&encoded_bytes).unwrap(); + assert_eq!( + Value::Bytes(Cow::Borrowed(b"\xFE\xED\xD0\xD0")), + borrowed_decoded + ); + assert!(matches!(borrowed_decoded, Value::Bytes(Cow::Borrowed(_)))); +} + +#[test] +fn incompatible_version() { + let mut incompatible_header = Vec::new(); + format::write_header(&mut incompatible_header, CURRENT_VERSION + 1).unwrap(); + assert!(matches!( + from_slice::<()>(&incompatible_header), + Err(Error::IncompatibleVersion) + )); +} + +#[test] +fn invalid_char_cast() { + let bytes = to_vec(&0x11_0000_u32).unwrap(); + + assert!(matches!( + from_slice::(&bytes), + Err(Error::InvalidUtf8(_)) + )); +} + +#[test] +fn bytes_to_identifier() { + let mut valid_bytes = Vec::new(); + format::write_header(&mut valid_bytes, CURRENT_VERSION).unwrap(); + format::write_named(&mut valid_bytes).unwrap(); + format::write_bytes(&mut valid_bytes, b"Unit").unwrap(); + + assert_eq!( + from_slice::(&valid_bytes).unwrap(), + EnumVariants::Unit + ); + + let mut invalid_bytes = Vec::new(); + format::write_header(&mut invalid_bytes, CURRENT_VERSION).unwrap(); + format::write_named(&mut invalid_bytes).unwrap(); + format::write_bytes(&mut invalid_bytes, &0xFFFF_FFFF_u32.to_be_bytes()).unwrap(); + + assert!(matches!( + from_slice::(&invalid_bytes), + Err(Error::InvalidUtf8(_)) + )); +} + +#[test] +fn invalid_symbol() { + let mut valid_bytes = Vec::new(); + format::write_header(&mut valid_bytes, CURRENT_VERSION).unwrap(); + format::write_atom_header(&mut valid_bytes, format::Kind::Symbol, 4).unwrap(); + format::write_bytes(&mut valid_bytes, &0xFFFF_FFFF_u32.to_be_bytes()).unwrap(); + + assert!(matches!( + from_slice::>(&valid_bytes), + Err(Error::InvalidUtf8(_)) + )); +} + +#[test] +fn unknown_special() { + let mut invalid_bytes = Vec::new(); + format::write_header(&mut invalid_bytes, CURRENT_VERSION).unwrap(); + format::write_atom_header( + &mut invalid_bytes, + format::Kind::Special, + format::SPECIAL_COUNT, + ) + .unwrap(); + + assert!(from_slice::<()>(&invalid_bytes).is_err()); +} + +/// In `BonsaiDb`, sometimes it's nice to use a `()` as an associated type +/// as a default. To allow changing data that was previously serialized as a +/// `()` but now has a new type, Pot allows converting between unit types +/// and defaults of all major serialized types. The net effect is if you +/// start with a derived `BonsaiDb` view with no `value =` argument, `()` is +/// used instead. With this flexibility, changing the value type to another +/// type will sometimes be able to work without requiring rebuilding the +/// views on deployment. +#[test] +#[allow(clippy::cognitive_complexity)] +fn unit_adaptations() { + #[derive(Deserialize)] + struct Test { + #[serde(default)] + value: u32, + } + + let unit = to_vec(&()).unwrap(); + assert!(from_slice::>(&unit).unwrap().is_some()); + assert_eq!(from_slice::(&unit).unwrap().value, 0); + assert_eq!(from_slice::<&[u8]>(&unit).unwrap(), b""); + assert_eq!(from_slice::(&unit).unwrap(), b""); + assert_eq!(from_slice::<&str>(&unit).unwrap(), ""); + assert_eq!(from_slice::(&unit).unwrap(), 0); + assert_eq!(from_slice::(&unit).unwrap(), 0); + assert_eq!(from_slice::(&unit).unwrap(), 0); + assert_eq!(from_slice::(&unit).unwrap(), 0); + assert_eq!(from_slice::(&unit).unwrap(), 0); + assert_eq!(from_slice::(&unit).unwrap(), 0); + assert_eq!(from_slice::(&unit).unwrap(), 0); + assert_eq!(from_slice::(&unit).unwrap(), 0); + assert_eq!(from_slice::(&unit).unwrap(), 0); + assert_eq!(from_slice::(&unit).unwrap(), 0); + assert!(!from_slice::(&unit).unwrap()); + + let none = to_vec(&Option::<()>::None).unwrap(); + assert!(from_slice::>(&none).unwrap().is_none()); + assert!(from_slice::>(&none).unwrap().is_none()); + assert_eq!(from_slice::<&[u8]>(&none).unwrap(), b""); + assert_eq!(from_slice::(&none).unwrap(), b""); + assert_eq!(from_slice::<&str>(&none).unwrap(), ""); + assert_eq!(from_slice::(&none).unwrap(), 0); + assert_eq!(from_slice::(&none).unwrap(), 0); + assert_eq!(from_slice::(&none).unwrap(), 0); + assert_eq!(from_slice::(&none).unwrap(), 0); + assert_eq!(from_slice::(&none).unwrap(), 0); + assert_eq!(from_slice::(&none).unwrap(), 0); + assert_eq!(from_slice::(&none).unwrap(), 0); + assert_eq!(from_slice::(&none).unwrap(), 0); + assert_eq!(from_slice::(&none).unwrap(), 0); + assert_eq!(from_slice::(&none).unwrap(), 0); + assert!(!from_slice::(&none).unwrap()); +} + +#[test] +fn invalid_numbers() { + let mut invalid_float_byte_len = Vec::new(); + format::write_header(&mut invalid_float_byte_len, CURRENT_VERSION).unwrap(); + format::write_atom_header(&mut invalid_float_byte_len, format::Kind::Float, 0).unwrap(); + + assert!(from_slice::(&invalid_float_byte_len).is_err()); + + assert!( + format::Float::read_from(format::Kind::Symbol, 0, &mut &invalid_float_byte_len[..]) + .is_err(), + ); + + let mut invalid_signed_byte_len = Vec::new(); + format::write_header(&mut invalid_signed_byte_len, CURRENT_VERSION).unwrap(); + format::write_atom_header(&mut invalid_signed_byte_len, format::Kind::Int, 10).unwrap(); + + assert!(from_slice::(&invalid_signed_byte_len).is_err()); + + assert!( + format::Integer::read_from(format::Kind::Symbol, 0, &mut &invalid_signed_byte_len[..]) + .is_err(), + ); + + let mut invalid_unsigned_byte_len = Vec::new(); + format::write_header(&mut invalid_unsigned_byte_len, CURRENT_VERSION).unwrap(); + format::write_atom_header(&mut invalid_unsigned_byte_len, format::Kind::UInt, 10).unwrap(); + + assert!(from_slice::(&invalid_unsigned_byte_len).is_err()); +} + +#[test] +#[allow(clippy::unnecessary_mut_passed)] // It's necessary. +fn not_human_readable() { + let mut bytes = Vec::new(); + let mut serializer = ser::Serializer::new(&mut bytes).unwrap(); + assert!(!(&mut serializer).is_human_readable()); + ().serialize(&mut serializer).unwrap(); + + let bytes = to_vec(&()).unwrap(); + let mut deserializer = de::Deserializer::from_slice(&bytes, usize::MAX).unwrap(); + assert!(!(&mut deserializer).is_human_readable()); +} + +#[test] +fn unexpected_eof() { + let mut invalid_bytes = Vec::new(); + format::write_header(&mut invalid_bytes, CURRENT_VERSION).unwrap(); + format::write_atom_header(&mut invalid_bytes, format::Kind::Bytes, 10).unwrap(); + assert!(matches!( + from_slice::>(&invalid_bytes), + Err(Error::Eof) + )); +} + +#[test] +fn too_big_read() { + let mut invalid_bytes = Vec::new(); + format::write_header(&mut invalid_bytes, CURRENT_VERSION).unwrap(); + format::write_atom_header(&mut invalid_bytes, format::Kind::Bytes, 10).unwrap(); + assert!(matches!( + Config::default() + .allocation_budget(9) + .deserialize::>(&invalid_bytes), + Err(Error::TooManyBytesRead) + )); +} + +#[derive(Serialize, Deserialize, Debug, PartialEq)] +struct Flatten { + #[serde(flatten)] + structure: Flattened, + #[serde(flatten)] + enumeration: EnumVariants, +} + +#[derive(Serialize, Deserialize, Debug, PartialEq)] +struct Flattened { + field: String, +} + +#[test] +fn test_flatten() { + test_serialization( + &Flatten { + structure: Flattened { + field: String::from("flat"), + }, + enumeration: EnumVariants::Struct { arg: 1 }, + }, + None, + ); +} + +#[test] +fn direct_value_serialization() { + fn roundtrip Deserialize<'de> + PartialEq + Debug>(value: &T) { + let as_value = Value::from_serialize(value).unwrap(); + let deserialized = as_value.deserialize_as::().unwrap(); + assert_eq!(&deserialized, value); + } + + roundtrip(&NumbersStruct { + u8: u8::MAX, + u16: u16::MAX, + char: char::MAX, + u32: u32::MAX, + u64: u64::MAX, + u128: u128::MAX, + i8: i8::MIN, + i16: i16::MIN, + i32: i32::MIN, + i64: i64::MIN, + i128: i128::MIN, + f32: f32::MAX, + f64: f64::MIN, + }); + + roundtrip(&EnumVariants::Struct { arg: 1 }); + roundtrip(&EnumVariants::Tuple(1)); + roundtrip(&EnumVariants::TupleTwoArgs(1, 2)); + roundtrip(&EnumVariants::Unit); + roundtrip(&Some(1_u32)); + roundtrip(&"hello".to_string()); + roundtrip(&b"hello".to_vec()); +} + +#[test] +fn borrowed_value_serialization() { + #[track_caller] + fn check(value: &T) + where + T: Serialize + Debug, + U: Debug + PartialEq + for<'de> Deserialize<'de>, + { + let as_value = Value::from_serialize(value).unwrap(); + let deserialized = as_value.deserialize_as::().unwrap(); + assert_eq!(&deserialized, value); + } + + check::<_, Vec>(&b"hello"); + check::<_, String>(&"hello"); +} + +#[test] +fn value_error() { + #[derive(Debug)] + struct Fallible; + + impl Serialize for Fallible { + fn serialize(&self, _serializer: S) -> Result + where + S: Serializer, + { + Err(serde::ser::Error::custom("oh no!")) + } + } + + assert_eq!( + Value::from_serialize(Fallible), + Err(ValueError::Custom(String::from("oh no!"))) + ); +} + +#[test] +fn persistent_symbols_slice() { + let mut sender = ser::SymbolMap::default(); + let mut receiver = de::SymbolList::default(); + + let mut bytes = sender.serialize_to_vec(&NumbersStruct::default()).unwrap(); + let _result = receiver.deserialize_slice::(&bytes).unwrap(); + let symbol_count_after_first_send = receiver.len(); + let first_payload_len = bytes.len(); + + // Send again, confirm the symbol list didn't grow. + bytes.clear(); + sender + .serialize_to(&mut bytes, &NumbersStruct::default()) + .unwrap(); + let _result = receiver.deserialize_slice::(&bytes).unwrap(); + assert_eq!(symbol_count_after_first_send, receiver.len()); + println!( + "First: {first_payload_len} bytes; Second: {} bytes", + bytes.len() + ); + assert!(first_payload_len > bytes.len()); +} + +#[test] +fn persistent_symbols_read() { + let mut sender = ser::SymbolMap::default(); + let mut receiver = de::SymbolList::default(); + + let mut bytes = sender.serialize_to_vec(&NumbersStruct::default()).unwrap(); + let _result = receiver + .deserialize_from::(&bytes[..]) + .unwrap(); + let symbol_count_after_first_send = receiver.len(); + let first_payload_len = bytes.len(); + + // Send again, confirm the symbol list didn't grow. + bytes.clear(); + sender + .serialize_to(&mut bytes, &NumbersStruct::default()) + .unwrap(); + let _result = receiver + .deserialize_from::(&bytes[..]) + .unwrap(); + assert_eq!(symbol_count_after_first_send, receiver.len()); + println!( + "First: {first_payload_len} bytes; Second: {} bytes", + bytes.len() + ); + assert!(first_payload_len > bytes.len()); +} + +#[test] +fn symbol_map_serialization() { + #[derive(Serialize, Deserialize, Default, Eq, PartialEq, Debug)] + struct Payload { + a: usize, + b: usize, + } + + let mut sender = crate::ser::SymbolMap::default(); + assert!(sender.is_empty()); + let mut receiver = crate::de::SymbolMap::new(); + assert!(receiver.is_empty()); + + // Send the first payload, populating the map. + let mut bytes = sender.serialize_to_vec(&Payload::default()).unwrap(); + assert_eq!(sender.len(), 2); + + assert_eq!( + receiver.deserialize_slice::(&bytes).unwrap(), + Payload::default() + ); + assert_eq!(receiver.len(), 2); + + // Serialize the maps. + let serialized_sender = crate::to_vec(&sender).unwrap(); + let serialized_receiver = crate::to_vec(&receiver).unwrap(); + // The serialization formats are the same despite using different + // in-memory representations. This allows pre-serializing a dictionary + // before starting the intial payload. + assert_eq!(serialized_sender, serialized_receiver); + let mut deserialized_sender = + crate::from_slice::(&serialized_sender).unwrap(); + let mut deserialized_receiver = + crate::from_slice::(&serialized_receiver).unwrap(); + + // Create a new payload and serialize it. Ensure the payloads produced + // by the serialized map and the original map are identical. + let new_payload = Payload { a: 1, b: 2 }; + bytes.clear(); + sender.serialize_to(&mut bytes, &new_payload).unwrap(); + let from_serialized_sender = deserialized_sender.serialize_to_vec(&new_payload).unwrap(); + assert_eq!(bytes, from_serialized_sender); + + // Deserialize the payload + assert_eq!( + receiver.deserialize_slice::(&bytes).unwrap(), + new_payload + ); + assert_eq!( + deserialized_receiver + .deserialize_slice::(&bytes) + .unwrap(), + new_payload + ); +} + +#[test] +fn symbol_map_population() { + let mut map = crate::ser::SymbolMap::default(); + map.populate_from(&NumbersStruct::default()).unwrap(); + map.populate_from(&EnumVariants::Struct { arg: 1 }).unwrap(); + map.populate_from(&EnumVariants::Tuple(0)).unwrap(); + map.populate_from(&EnumVariants::TupleTwoArgs(0, 1)) + .unwrap(); + assert_eq!(map.populate_from(&EnumVariants::Unit).unwrap(), 1); + assert_eq!(map.populate_from(&EnumVariants::Unit).unwrap(), 0); + dbg!(map); +} + +#[test] +fn backwards_compatible() { + #[derive(Debug, Serialize, Deserialize, Eq, PartialEq)] + struct Canary { + name: String, + id: u64, + } + + let canary = Canary { + name: String::from("coalmine"), + id: 0xfeed_d0d0_dead_beef, + }; + + // This payload was generated with pot 1.0 using the same structure. + // This structure should be updated to be more encompassing, but this at + // least tests for basic compatibility. + let v1_canary = [ + 80, 111, 116, 0, 162, 200, 110, 97, 109, 101, 232, 99, 111, 97, 108, 109, 105, 110, 101, + 196, 105, 100, 71, 239, 190, 173, 222, 208, 208, 237, 254, + ]; + let parsed: Canary = crate::from_slice(&v1_canary).unwrap(); + assert_eq!(canary, parsed); +}