diff --git a/CHANGELOG.md b/CHANGELOG.md index 15810157e..4e1bee615 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 0.7.5 (2023-12-21) - `utils/core` crate only +* Added variable-length serialization and deserialization for `usize` type (#238). + ## 0.7.4 (2023-12-18) - `air` crate only * Fixed a bug in `StarkProof` deserialization (#236). diff --git a/utils/core/Cargo.toml b/utils/core/Cargo.toml index 9e0bc8aaf..234d2d553 100644 --- a/utils/core/Cargo.toml +++ b/utils/core/Cargo.toml @@ -22,3 +22,6 @@ std = [] [dependencies] rayon = { version = "1.8", optional = true } + +[dev-dependencies] +proptest = "1.3" diff --git a/utils/core/src/serde/byte_reader.rs b/utils/core/src/serde/byte_reader.rs index f784793f8..aa1f156cf 100644 --- a/utils/core/src/serde/byte_reader.rs +++ b/utils/core/src/serde/byte_reader.rs @@ -92,6 +92,41 @@ pub trait ByteReader { Ok(u64::from_le_bytes(bytes)) } + /// Returns a usize value read from `self` in [vint64](https://docs.rs/vint64/latest/vint64/) + /// format. + /// + /// # Errors + /// Returns a [DeserializationError] if: + /// * usize value could not be read from `self`. + /// * encoded value is greater than `usize` maximum value on a given platform. + fn read_usize(&mut self) -> Result { + let first_byte = self.peek_u8()?; + let length = first_byte.trailing_zeros() as usize + 1; + + let result = if length == 9 { + // 9-byte special case + self.read_u8()?; + let value = self.read_array::<8>()?; + u64::from_le_bytes(value) + } else { + let mut encoded = [0u8; 8]; + let value = self.read_slice(length)?; + encoded[..length].copy_from_slice(value); + u64::from_le_bytes(encoded) >> length + }; + + // check if the result value is within acceptable bounds for `usize` on a given platform + if result > usize::MAX as u64 { + return Err(DeserializationError::InvalidValue(format!( + "Encoded value must be less than {}, but {} was provided", + usize::MAX, + result + ))); + } + + Ok(result as usize) + } + /// Returns a u128 value read from `self` in little-endian byte order. /// /// # Errors diff --git a/utils/core/src/serde/byte_writer.rs b/utils/core/src/serde/byte_writer.rs index eddc7c424..bd529a7a9 100644 --- a/utils/core/src/serde/byte_writer.rs +++ b/utils/core/src/serde/byte_writer.rs @@ -62,6 +62,24 @@ pub trait ByteWriter: Sized { self.write_bytes(&value.to_le_bytes()); } + /// Writes a usize value in [vint64](https://docs.rs/vint64/latest/vint64/) format into `self`. + /// + /// # Panics + /// Panics if the value could not be written into `self`. + fn write_usize(&mut self, value: usize) { + let length = encoded_len(value); + + // 9-byte special case + if length == 9 { + // length byte is zero in this case + self.write_u8(0); + self.write(value.to_le_bytes()); + } else { + let encoded_bytes = ((value << 1 | 1) << (length - 1)).to_le_bytes(); + self.write_bytes(&encoded_bytes[..length]); + } + } + /// Writes a serializable value into `self`. /// /// # Panics @@ -83,3 +101,13 @@ impl ByteWriter for Vec { self.extend_from_slice(values); } } + +// HELPER FUNCTIONS +// ================================================================================================ + +/// Returns the length of the value in vint64 enсoding. +pub fn encoded_len(value: usize) -> usize { + let zeros = value.leading_zeros() as usize; + let len = zeros.saturating_sub(1) / 7; + 9 - core::cmp::min(len, 8) +} diff --git a/utils/core/src/serde/mod.rs b/utils/core/src/serde/mod.rs index 6f265ffdd..6635da98d 100644 --- a/utils/core/src/serde/mod.rs +++ b/utils/core/src/serde/mod.rs @@ -161,6 +161,12 @@ impl Serializable for u64 { } } +impl Serializable for usize { + fn write_into(&self, target: &mut W) { + target.write_usize(*self) + } +} + impl Serializable for Option { fn write_into(&self, target: &mut W) { match self { @@ -387,6 +393,12 @@ impl Deserializable for u64 { } } +impl Deserializable for usize { + fn read_from(source: &mut R) -> Result { + source.read_usize() + } +} + impl Deserializable for Option { fn read_from(source: &mut R) -> Result { let contains = source.read_bool()?; diff --git a/utils/core/src/tests.rs b/utils/core/src/tests.rs index f946a6c86..68d391d27 100644 --- a/utils/core/src/tests.rs +++ b/utils/core/src/tests.rs @@ -4,6 +4,7 @@ // LICENSE file in the root directory of this source tree. use super::{collections::Vec, ByteReader, ByteWriter, Serializable, SliceReader}; +use proptest::prelude::{any, proptest}; // VECTOR UTILS TESTS // ================================================================================================ @@ -112,6 +113,29 @@ fn write_serializable() { assert_eq!(234567u128, reader.read_u128().unwrap()); } +#[test] +fn write_serializable_usize() { + let mut target: Vec = Vec::new(); + + target.write(0usize); + assert_eq!(1, target.len()); + target.write(1usize); + assert_eq!(2, target.len()); + target.write(255usize); + assert_eq!(4, target.len()); + target.write(234567usize); + assert_eq!(7, target.len()); + target.write(usize::MAX); + assert_eq!(16, target.len()); + + let mut reader = SliceReader::new(&target); + assert_eq!(0usize, reader.read_usize().unwrap()); + assert_eq!(1usize, reader.read_usize().unwrap()); + assert_eq!(255usize, reader.read_usize().unwrap()); + assert_eq!(234567usize, reader.read_usize().unwrap()); + assert_eq!(usize::MAX, reader.read_usize().unwrap()); +} + #[test] fn write_serializable_batch() { let mut target: Vec = Vec::new(); @@ -147,3 +171,16 @@ fn write_serializable_array_batch() { assert_eq!(i, reader.read_u128().unwrap()); } } + +// UTILS - RANDOMIZED - UINT SERIALIZATION AND DESERIALIZATION +// ================================================================================================ +proptest! { + #[test] + fn usize_proptest(a in any::()) { + let mut target: Vec = Vec::new(); + target.write(a); + + let mut reader = SliceReader::new(&target); + assert_eq!(a, reader.read_usize().unwrap()); + } +}