From aad7b05cf23850ed18386e939249522b8b10d220 Mon Sep 17 00:00:00 2001 From: Paul Butler Date: Thu, 2 May 2024 09:00:54 -0400 Subject: [PATCH] add serialized map --- serialized-map/.gitignore | 1 + serialized-map/Cargo.toml | 8 +++ serialized-map/src/lib.rs | 132 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 141 insertions(+) create mode 100644 serialized-map/.gitignore create mode 100644 serialized-map/Cargo.toml create mode 100644 serialized-map/src/lib.rs diff --git a/serialized-map/.gitignore b/serialized-map/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/serialized-map/.gitignore @@ -0,0 +1 @@ +/target diff --git a/serialized-map/Cargo.toml b/serialized-map/Cargo.toml new file mode 100644 index 0000000..efcaa42 --- /dev/null +++ b/serialized-map/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "serialized-map" +version = "0.1.0" +edition = "2021" + +[dependencies] +brotli = "5.0.0" +thiserror = "1.0.59" diff --git a/serialized-map/src/lib.rs b/serialized-map/src/lib.rs new file mode 100644 index 0000000..4b026bd --- /dev/null +++ b/serialized-map/src/lib.rs @@ -0,0 +1,132 @@ +use std::collections::BTreeMap; +pub type ByteByteMap = BTreeMap, Vec>; + +// File format: +// 0xa8 0x32 (magic number) +// 0x00 (version) +// 0x03 (header entries) +// (header entries) +// 0x08 (main entries) + +// 0x0 + + +#[derive(thiserror::Error, Debug, PartialEq, Eq)] +pub enum DeserializeError { + #[error("Bad magic number")] + BadMagicNumber, + + #[error("Unexpected end of file")] + UnexpectedEof, +} + +// TODO: +// - [ ] Brotli compression +// - [ ] Header data (version, format version, flavor, date, etc.) +// - [x] Magic number +// - [ ] Delta files +// - [ ] Variable-length encoding of length + +pub fn write_length_prefixed( + writer: &mut impl std::io::Write, + value: &[u8], +) -> std::io::Result<()> { + let len = value.len() as u32; + // write the length as a u32 in big endian + let mut len_bytes = [0; 4]; + len_bytes.copy_from_slice(&len.to_be_bytes()); + writer.write_all(&len_bytes)?; + writer.write_all(value)?; + Ok(()) +} + +pub fn read_length_prefixed(reader: &mut impl std::io::Read) -> Result, DeserializeError> { + let mut len_bytes = [0; 4]; + reader.read_exact(&mut len_bytes).map_err(|_| DeserializeError::UnexpectedEof)?; + let len = u32::from_be_bytes(len_bytes); + let mut value = vec![0; len as usize]; + reader.read_exact(&mut value).map_err(|_| DeserializeError::UnexpectedEof)?; + Ok(value) +} + +pub fn serialize(map: &ByteByteMap, mut writer: impl std::io::Write) -> std::io::Result<()> { + let magic = [0xa8, 0x32]; + writer.write_all(&magic)?; + + let len = map.len() as u32; + // write the length as a u32 in big endian + let mut len_bytes = [0; 4]; + len_bytes.copy_from_slice(&len.to_be_bytes()); + writer.write_all(&len_bytes)?; + for (key, value) in map { + write_length_prefixed(&mut writer, key.as_slice())?; + write_length_prefixed(&mut writer, value.as_slice())?; + } + Ok(()) +} + +pub fn serialize_to_bytes(map: &ByteByteMap) -> Vec { + let mut writer = Vec::new(); + serialize(map, &mut writer).unwrap(); + writer +} + +pub fn deserialize(mut reader: impl std::io::Read) -> Result { + let mut magic_buf = [0; 2]; + reader.read_exact(&mut magic_buf).map_err(|_| DeserializeError::UnexpectedEof)?; + + if magic_buf != [0xa8, 0x32] { + return Err(DeserializeError::BadMagicNumber) + } + + let mut map = BTreeMap::new(); + let mut buf = [0; 4]; + reader.read_exact(&mut buf).map_err(|_| DeserializeError::UnexpectedEof)?; + let len = u32::from_be_bytes(buf); + for _ in 0..len { + let key = read_length_prefixed(&mut reader)?; + let value = read_length_prefixed(&mut reader)?; + map.insert(key, value); + } + Ok(map) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_map() { + let map: ByteByteMap = BTreeMap::new(); + let bytes = serialize_to_bytes(&map); + assert_eq!(deserialize(bytes.as_slice()).unwrap(), map); + } + + #[test] + fn single_entry() { + let map = BTreeMap::from([(b"foo".to_vec(), b"bar".to_vec())]); + let bytes = serialize_to_bytes(&map); + assert_eq!(deserialize(bytes.as_slice()).unwrap(), map); + } + + #[test] + fn multiple_entries() { + let map = BTreeMap::from([ + (b"foo".to_vec(), b"bar".to_vec()), + (b"baz".to_vec(), b"qux".to_vec()), + ]); + + let bytes = serialize_to_bytes(&map); + assert_eq!(deserialize(bytes.as_slice()).unwrap(), map); + } + + #[test] + fn bad_magic_number() { + let c: Vec = b"1234".to_vec(); + let d: &[u8] = &c; + + let result = deserialize(d); + + assert_eq!(result.unwrap_err(), DeserializeError::BadMagicNumber); + } +}