diff --git a/.github/workflows/py-bindings.yml b/.github/workflows/py-bindings.yml new file mode 100644 index 00000000..8db03785 --- /dev/null +++ b/.github/workflows/py-bindings.yml @@ -0,0 +1,108 @@ +# This file is based on the output of: +# +# maturin generate-ci github --pytest -m bindings/python/Cargo.toml +# +# Using maturin v1.4.0 +name: Py-bindings + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +permissions: + contents: read + +jobs: + linux: + runs-on: ubuntu-latest + strategy: + matrix: + target: [x86_64, x86, aarch64, armv7, s390x, ppc64le] + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist --find-interpreter --manifest-path bindings/python/Cargo.toml + sccache: 'true' + manylinux: auto + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + - name: pytest + if: ${{ startsWith(matrix.target, 'x86_64') }} + shell: bash + run: | + set -e + pip install mla-archive --find-links dist --force-reinstall + pip install pytest + cd bindings/python && pytest + + windows: + runs-on: windows-latest + strategy: + matrix: + target: [x64, x86] + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + architecture: ${{ matrix.target }} + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist --find-interpreter --manifest-path bindings/python/Cargo.toml + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + - name: pytest + shell: bash + run: | + set -e + pip install mla-archive --find-links dist --force-reinstall + pip install pytest + cd bindings/python && pytest + + macos: + runs-on: macos-latest + strategy: + matrix: + target: [x86_64, aarch64] + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist --find-interpreter --manifest-path bindings/python/Cargo.toml + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + - name: pytest + if: ${{ !startsWith(matrix.target, 'aarch64') }} + shell: bash + run: | + set -e + pip install mla-archive --find-links dist --force-reinstall + pip install pytest + cd bindings/python && pytest + diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml new file mode 100644 index 00000000..a21f1908 --- /dev/null +++ b/bindings/python/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "pymla" +version = "0.1.0" +edition = "2021" +authors = ["Camille Mougey "] +license = "LGPL-3.0-only" +description = "Multi Layer Archive - A pure rust encrypted and compressed archive file format" +homepage = "https://github.com/ANSSI-FR/MLA" +repository = "https://github.com/ANSSI-FR/MLA" +readme = "../../README.md" + +# Avoid cargo feature unification, which might broke other build in the workspace +[workspace] +members = ["."] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "pymla" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = "0" +mla = { version = "1", features = ["send"], path = "../../mla"} +x25519-dalek = "2" +curve25519-parser = { path = "../../curve25519-parser", version = "0.4" } diff --git a/bindings/python/LICENSE.md b/bindings/python/LICENSE.md new file mode 100644 index 00000000..0a041280 --- /dev/null +++ b/bindings/python/LICENSE.md @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml new file mode 100644 index 00000000..b69ad5d3 --- /dev/null +++ b/bindings/python/pyproject.toml @@ -0,0 +1,27 @@ +[build-system] +requires = ["maturin>=1.4,<2.0"] +build-backend = "maturin" + +[project] +name = "mla-archive" +description = "Bindings for MLA Archive manipulation" +authors = [ + { name="Mougey Camille", email="camille.mougey@ssi.gouv.fr" }, +] +requires-python = ">=3.8" +keywords = ["archive", "mla"] +license = {file = "LICENSE.md"} +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dynamic = ["version"] + +[project.urls] +documentation = "https://github.com/ANSSI-FR/MLA" +repository = "https://github.com/ANSSI-FR/MLA" + +[tool.maturin] +features = ["pyo3/extension-module"] +module-name = "mla" diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs new file mode 100644 index 00000000..1b3525c1 --- /dev/null +++ b/bindings/python/src/lib.rs @@ -0,0 +1,1010 @@ +use std::{ + borrow::Cow, + collections::HashMap, + fs::File, + io::{self, Read}, +}; + +use curve25519_parser::{parse_openssl_25519_privkey, parse_openssl_25519_pubkey}; +use mla::{ + config::{ArchiveReaderConfig, ArchiveWriterConfig}, + ArchiveReader, ArchiveWriter, Layers, +}; +use pyo3::{ + create_exception, + exceptions::{PyKeyError, PyRuntimeError, PyTypeError}, + prelude::*, + types::{PyBytes, PyString, PyTuple, PyType}, +}; + +// -------- Error handling -------- + +/// Wrapper over MLA custom error, due to the "orphan rule" +/// - WrappedMLA: MLA specifics errors +/// - WrappedPy: Python related errors +#[derive(Debug)] +enum WrappedError { + WrappedMLA(mla::errors::Error), + WrappedPy(PyErr), +} + +// Add a dedicated MLA Exception (mla.MLAError) and associated sub-Exception +// IOError and AssertionError are not mapped, as they already map to Python Exception +create_exception!(mla, MLAError, pyo3::exceptions::PyException); +create_exception!(mla, WrongMagic, MLAError, "Wrong magic, must be \"MLA\""); +create_exception!( + mla, + UnsupportedVersion, + MLAError, + "Unsupported version, must be 1" +); +create_exception!( + mla, + InvalidECCKeyFormat, + MLAError, + "Supplied ECC key is not in the expected format" +); +create_exception!(mla, WrongBlockSubFileType, MLAError, "Wrong BlockSubFile magic has been encountered. Is the deserializion tarting at the beginning of a block?"); +create_exception!( + mla, + UTF8ConversionError, + MLAError, + "An error has occurred while converting into UTF8. This error could" +); +create_exception!( + mla, + FilenameTooLong, + MLAError, + "Filenames have a limited size `FILENAME_MAX_SIZE`" +); +create_exception!( + mla, + WrongArchiveWriterState, + MLAError, + "The writer state is not in the expected state for the current operation" +); +create_exception!( + mla, + WrongReaderState, + MLAError, + "The reader state is not in the expected state for the current operation" +); +create_exception!( + mla, + WrongWriterState, + MLAError, + "The writer state is not in the expected state for the current operation" +); +create_exception!( + mla, + RandError, + MLAError, + "Error with the inner random generator" +); +create_exception!( + mla, + PrivateKeyNeeded, + MLAError, + "A Private Key is required to decrypt the encrypted cipher key" +); +create_exception!( + mla, + DeserializationError, + MLAError, + "Deserialization error. May happens when starting from a wrong offset / version mismatch" +); +create_exception!( + mla, + SerializationError, + MLAError, + "Serialization error. May happens on I/O errors" +); +create_exception!(mla, MissingMetadata, MLAError, "Missing metadata (usually means the footer has not been correctly read, a repair might be needed)"); +create_exception!( + mla, + BadAPIArgument, + MLAError, + "Error returned on API call with incorrect argument" +); +create_exception!( + mla, + EndOfStream, + MLAError, + "End of stream reached, no more data should be expected" +); +create_exception!( + mla, + ConfigError, + MLAError, + "An error happens in the configuration" +); +create_exception!(mla, DuplicateFilename, MLAError, "Filename already used"); +create_exception!( + mla, + AuthenticatedDecryptionWrongTag, + MLAError, + "Wrong tag while decrypting authenticated data" +); +create_exception!( + mla, + HKDFInvalidKeyLength, + MLAError, + "Unable to expand while using the HKDF" +); + +// Convert potentials errors to the wrapped type + +impl From for WrappedError { + fn from(err: mla::errors::Error) -> Self { + WrappedError::WrappedMLA(err) + } +} + +impl From for WrappedError { + fn from(err: mla::errors::ConfigError) -> Self { + WrappedError::WrappedMLA(mla::errors::Error::ConfigError(err)) + } +} + +impl From for WrappedError { + fn from(err: std::io::Error) -> Self { + WrappedError::WrappedPy(err.into()) + } +} + +impl From for WrappedError { + fn from(err: PyErr) -> Self { + WrappedError::WrappedPy(err) + } +} + +/// Convert back the wrapped type to Python errors +impl From for PyErr { + fn from(err: WrappedError) -> PyErr { + match err { + WrappedError::WrappedMLA(inner_err) => { + match inner_err { + mla::errors::Error::IOError(err) => PyErr::new::(err), + mla::errors::Error::AssertionError(msg) => PyErr::new::(msg), + mla::errors::Error::WrongMagic => PyErr::new::("Wrong magic, must be \"MLA\""), + mla::errors::Error::UnsupportedVersion => PyErr::new::("Unsupported version, must be 1"), + mla::errors::Error::InvalidECCKeyFormat => PyErr::new::("Supplied ECC key is not in the expected format"), + mla::errors::Error::WrongBlockSubFileType => PyErr::new::("Wrong BlockSubFile magic has been encountered. Is the deserializion tarting at the beginning of a block?"), + mla::errors::Error::UTF8ConversionError(err) => PyErr::new::(err), + mla::errors::Error::FilenameTooLong => PyErr::new::("Filenames have a limited size `FILENAME_MAX_SIZE`"), + mla::errors::Error::WrongArchiveWriterState { current_state, expected_state } => PyErr::new::(format!("The writer state is not in the expected state for the current operation. Current state: {:?}, expected state: {:?}", current_state, expected_state)), + mla::errors::Error::WrongReaderState(msg) => PyErr::new::(msg), + mla::errors::Error::WrongWriterState(msg) => PyErr::new::(msg), + mla::errors::Error::RandError(err) => PyErr::new::(format!("{:}", err)), + mla::errors::Error::PrivateKeyNeeded => PyErr::new::("A Private Key is required to decrypt the encrypted cipher key"), + mla::errors::Error::DeserializationError => PyErr::new::("Deserialization error. May happens when starting from a wrong offset / version mismatch"), + mla::errors::Error::SerializationError => PyErr::new::("Serialization error. May happens on I/O errors"), + mla::errors::Error::MissingMetadata => PyErr::new::("Missing metadata (usually means the footer has not been correctly read, a repair might be needed)"), + mla::errors::Error::BadAPIArgument(msg) => PyErr::new::(msg), + mla::errors::Error::EndOfStream => PyErr::new::("End of stream reached, no more data should be expected"), + mla::errors::Error::ConfigError(err) => PyErr::new::(format!("{:}", err)), + mla::errors::Error::DuplicateFilename => PyErr::new::("Filename already used"), + mla::errors::Error::AuthenticatedDecryptionWrongTag => PyErr::new::("Wrong tag while decrypting authenticated data"), + mla::errors::Error::HKDFInvalidKeyLength => PyErr::new::("Unable to expand while using the HKDF"), + } + }, + WrappedError::WrappedPy(inner_err) => inner_err + } + } +} +// -------- mla.FileMetadata -------- + +#[pyclass] +struct FileMetadata { + size: Option, + hash: Option<[u8; 32]>, +} + +#[pymethods] +impl FileMetadata { + #[getter] + fn size(&self) -> Option { + self.size + } + + #[getter] + fn hash(&self) -> Option> { + self.hash.as_ref().map(|h| Cow::Borrowed::<[u8]>(h)) + } + + fn __repr__(&self) -> String { + format!("", self.size, self.hash) + } +} + +// -------- mla.PublicKeys -------- + +/// Represents multiple ECC Public Keys +/// +/// Instanciate with path (as string) or data (as bytes) +/// PEM and DER format are supported +/// +/// Example: +/// ```python +/// pkeys = PublicKeys("/path/to/key.pem", b""" +/// -----BEGIN PUBLIC KEY----- +/// ... +/// -----END PUBLIC KEY----- +/// """) +/// ``` +#[derive(Clone)] +#[pyclass] +struct PublicKeys { + keys: Vec, +} + +#[pymethods] +impl PublicKeys { + #[new] + #[pyo3(signature = (*args))] + fn new(args: &PyTuple) -> Result { + let mut keys = Vec::new(); + + for element in args { + // String argument: this is a path + // "/path/to/public.pem" + if let Ok(path) = element.downcast::() { + let mut file = File::open(path.to_string())?; + // Load the the ECC key in-memory and parse it + let mut buf = Vec::new(); + file.read_to_end(&mut buf)?; + keys.push( + parse_openssl_25519_pubkey(&buf) + .map_err(|_| mla::errors::Error::InvalidECCKeyFormat)?, + ); + } else if let Ok(data) = element.downcast::() { + keys.push( + parse_openssl_25519_pubkey(data.as_bytes()) + .map_err(|_| mla::errors::Error::InvalidECCKeyFormat)?, + ); + } else { + return Err( + PyTypeError::new_err("Expect a path (as a string) or data (as bytes)").into(), + ); + } + } + Ok(Self { keys }) + } + + /// DER representation of keys + #[getter] + fn keys(&self) -> Vec> { + self.keys + .iter() + .map(|pubkey| Cow::Owned(Vec::from(pubkey.to_bytes()))) + .collect() + } +} + +// -------- mla.PrivateKeys -------- + +/// Represents multiple ECC Private Keys +/// +/// Instanciate with path (as string) or data (as bytes) +/// PEM and DER format are supported +/// +/// Example: +/// ```python +/// pkeys = PrivateKeys("/path/to/key.pem", b""" +/// -----BEGIN PRIVATE KEY----- +/// ... +/// -----END PRIVATE KEY----- +/// """) +/// ``` +#[derive(Clone)] +#[pyclass] +struct PrivateKeys { + keys: Vec, +} + +#[pymethods] +impl PrivateKeys { + #[new] + #[pyo3(signature = (*args))] + fn new(args: &PyTuple) -> Result { + let mut keys = Vec::new(); + + for element in args { + // String argument: this is a path + // "/path/to/public.pem" + if let Ok(path) = element.downcast::() { + let mut file = File::open(path.to_string())?; + // Load the the ECC key in-memory and parse it + let mut buf = Vec::new(); + file.read_to_end(&mut buf)?; + keys.push( + parse_openssl_25519_privkey(&buf) + .map_err(|_| mla::errors::Error::InvalidECCKeyFormat)?, + ); + } else if let Ok(data) = element.downcast::() { + keys.push( + parse_openssl_25519_privkey(data.as_bytes()) + .map_err(|_| mla::errors::Error::InvalidECCKeyFormat)?, + ); + } else { + return Err( + PyTypeError::new_err("Expect a path (as a string) or data (as bytes)").into(), + ); + } + } + Ok(Self { keys }) + } + + /// DER representation of keys + /// :warning: This keys must be kept secrets! + #[getter] + fn keys(&self) -> Vec> { + self.keys + .iter() + .map(|privkey| Cow::Owned(Vec::from(privkey.to_bytes()))) + .collect() + } +} + +// -------- mla.ConfigWriter -------- + +// from mla::layers::DEFAULT_COMPRESSION_LEVEL +const DEFAULT_COMPRESSION_LEVEL: u32 = 5; + +// This class keep the values of configured object, and can be used to produce an actual +// `ArchiveWriterConfig`. That way, it can be used to produced many of them, as they are +// consumed during the `ArchiveWriter` init (to avoid reusing cryptographic materials) +#[pyclass] +struct WriterConfig { + layers: Layers, + compression_level: u32, + public_keys: Option, +} + +#[pymethods] +impl WriterConfig { + #[new] + #[pyo3(signature = (layers=None, compression_level=DEFAULT_COMPRESSION_LEVEL, public_keys=None))] + fn new( + layers: Option, + compression_level: u32, + public_keys: Option, + ) -> Result { + // Check parameters + let layers = match layers { + Some(layers_enabled) => Layers::from_bits(layers_enabled).ok_or( + mla::errors::Error::BadAPIArgument("Unknown layers".to_string()), + )?, + None => Layers::DEFAULT, + }; + + // Check compression level is correct using a fake object + ArchiveWriterConfig::new().with_compression_level(compression_level)?; + + Ok(WriterConfig { + layers, + compression_level, + public_keys, + }) + } + + #[getter] + fn layers(&self) -> u8 { + self.layers.bits() + } + + /// Enable a layer + fn enable_layer(mut slf: PyRefMut, layer: u8) -> Result, WrappedError> { + let layer = Layers::from_bits(layer).ok_or(mla::errors::Error::BadAPIArgument( + "Unknown layer".to_string(), + ))?; + slf.layers |= layer; + Ok(slf) + } + + /// Disable a layer + fn disable_layer(mut slf: PyRefMut, layer: u8) -> Result, WrappedError> { + let layer = Layers::from_bits(layer).ok_or(mla::errors::Error::BadAPIArgument( + "Unknown layer".to_string(), + ))?; + slf.layers &= !layer; + Ok(slf) + } + + /// Set several layers at once + fn set_layers(mut slf: PyRefMut, layers: u8) -> Result, WrappedError> { + slf.layers = Layers::from_bits(layers).ok_or(mla::errors::Error::BadAPIArgument( + "Unknown layer".to_string(), + ))?; + Ok(slf) + } + + /// Set the compression level + /// compression level (0-11); bigger values cause denser, but slower compression + fn with_compression_level( + mut slf: PyRefMut, + compression_level: u32, + ) -> Result, WrappedError> { + // Check compression level is correct using a fake object + ArchiveWriterConfig::new().with_compression_level(compression_level)?; + + slf.compression_level = compression_level; + Ok(slf) + } + + #[getter] + fn compression_level(&self) -> u32 { + self.compression_level + } + + /// Set public keys + fn set_public_keys( + mut slf: PyRefMut, + public_keys: PublicKeys, + ) -> Result, WrappedError> { + slf.public_keys = Some(public_keys); + Ok(slf) + } + + #[getter] + fn public_keys(&self) -> Option { + self.public_keys.clone() + } +} + +impl WriterConfig { + /// Create an `ArchiveWriterConfig` out of the python object + fn to_archive_writer_config(&self) -> Result { + let mut config = ArchiveWriterConfig::new(); + config.set_layers(self.layers); + config.with_compression_level(self.compression_level)?; + if let Some(ref public_keys) = self.public_keys { + config.add_public_keys(&public_keys.keys); + } + Ok(config) + } +} + +// -------- mla.ConfigReader -------- + +// This class keep the values of configured object, and can be used to produce an actual +// `ArchiveReaderConfig`. That way, it can be used to produced many of them, as they are +// consumed during the `ArchiveReader` init +#[pyclass] +struct ReaderConfig { + private_keys: Option, +} + +#[pymethods] +impl ReaderConfig { + #[new] + #[pyo3(signature = (private_keys=None))] + fn new(private_keys: Option) -> Self { + ReaderConfig { private_keys } + } + + /// Set private keys + fn set_private_keys( + mut slf: PyRefMut, + private_keys: PrivateKeys, + ) -> Result, WrappedError> { + slf.private_keys = Some(private_keys); + Ok(slf) + } + + #[getter] + fn private_keys(&self) -> Option { + self.private_keys.clone() + } +} + +impl ReaderConfig { + /// Create an `ArchiveReaderConfig` out of the python object + fn to_archive_reader_config(&self) -> Result { + let mut config = ArchiveReaderConfig::new(); + if let Some(ref private_keys) = self.private_keys { + config.add_private_keys(&private_keys.keys); + config.layers_enabled |= Layers::ENCRYPT; + } + Ok(config) + } +} + +// -------- mla.MLAFile -------- + +/// `ArchiveWriter` is a generic type. To avoid generating several Python implementation +/// (see https://pyo3.rs/v0.20.2/class.html#no-generic-parameters), this enum explicitely +/// instanciate `ArchiveWriter` for common & expected types +/// +/// Additionnaly, as the GC in Python might drop objects at any time, we need to use +/// `'static` lifetime for the writer. This should not be a problem as the writer is not +/// supposed to be used after the drop of the parent object +/// (see https://pyo3.rs/v0.20.2/class.html#no-lifetime-parameters) +enum ExplicitWriters { + FileWriter(ArchiveWriter<'static, std::fs::File>), +} + +/// Wrap calls to the inner type +impl ExplicitWriters { + fn finalize(&mut self) -> Result<(), mla::errors::Error> { + match self { + ExplicitWriters::FileWriter(writer) => { + writer.finalize()?; + Ok(()) + } + } + } + + fn add_file( + &mut self, + key: &str, + size: u64, + reader: &mut R, + ) -> Result<(), mla::errors::Error> { + match self { + ExplicitWriters::FileWriter(writer) => { + writer.add_file(key, size, reader)?; + Ok(()) + } + } + } + + fn start_file(&mut self, key: &str) -> Result { + match self { + ExplicitWriters::FileWriter(writer) => writer.start_file(key), + } + } + + fn append_file_content( + &mut self, + id: u64, + size: usize, + data: &[u8], + ) -> Result<(), mla::errors::Error> { + match self { + ExplicitWriters::FileWriter(writer) => { + writer.append_file_content(id, size as u64, data) + } + } + } + + fn end_file(&mut self, id: u64) -> Result<(), mla::errors::Error> { + match self { + ExplicitWriters::FileWriter(writer) => writer.end_file(id), + } + } +} + +/// See `ExplicitWriters` for details +enum ExplicitReaders { + FileReader(ArchiveReader<'static, std::fs::File>), +} + +/// Wrap calls to the inner type +impl ExplicitReaders { + fn list_files(&self) -> Result, mla::errors::Error> { + match self { + ExplicitReaders::FileReader(reader) => reader.list_files(), + } + } +} + +/// Opening Mode for a MLAFile +enum OpeningModeInner { + Read(ExplicitReaders), + Write(ExplicitWriters), +} + +#[pyclass] +pub struct MLAFile { + /// Wrapping over the rust object, depending on the opening mode + inner: OpeningModeInner, + /// Path of the file, used for messages + path: String, +} + +/// Used to check whether the opening mode is the expected one, and unwrap it +/// return a BadAPI argument error if not +/// ```text +/// let inner = check_mode!(self, Read); +/// ``` +macro_rules! check_mode { + ( $self:expr, $x:ident ) => {{ + match &$self.inner { + OpeningModeInner::$x(inner) => inner, + _ => { + return Err(mla::errors::Error::BadAPIArgument(format!( + "This API is only callable in {:} mode", + stringify!($x) + )) + .into()) + } + } + }}; + ( mut $self:expr, $x:ident ) => {{ + match &mut $self.inner { + OpeningModeInner::$x(inner) => inner, + _ => { + return Err(mla::errors::Error::BadAPIArgument(format!( + "This API is only callable in {:} mode", + stringify!($x) + )) + .into()) + } + } + }}; +} + +#[pymethods] +impl MLAFile { + #[new] + #[pyo3(signature = (path, mode="r", config=None))] + fn new(path: &str, mode: &str, config: Option<&PyAny>) -> Result { + match mode { + "r" => { + let rconfig = match config { + Some(config) => { + // Must be a ReaderConfig + config + .extract::>()? + .to_archive_reader_config()? + } + None => ArchiveReaderConfig::new(), + }; + let input_file = std::fs::File::open(path)?; + let arch_reader = ArchiveReader::from_config(input_file, rconfig)?; + Ok(MLAFile { + inner: OpeningModeInner::Read(ExplicitReaders::FileReader(arch_reader)), + path: path.to_string(), + }) + } + "w" => { + let wconfig = match config { + Some(config) => { + // Must be a WriterConfig + config + .extract::>()? + .to_archive_writer_config()? + } + None => ArchiveWriterConfig::new(), + }; + let output_file = std::fs::File::create(path)?; + let arch_writer = ArchiveWriter::from_config(output_file, wconfig)?; + Ok(MLAFile { + inner: OpeningModeInner::Write(ExplicitWriters::FileWriter(arch_writer)), + path: path.to_string(), + }) + } + _ => Err(mla::errors::Error::BadAPIArgument(format!( + "Unknown mode {}, use 'r' or 'w'", + mode + )) + .into()), + } + } + + fn __repr__(&self) -> String { + format!( + "", + self.path, + match self.inner { + OpeningModeInner::Read(_) => "r", + OpeningModeInner::Write(_) => "w", + } + ) + } + + /// Return the list of files in the archive + fn keys(&self) -> Result, WrappedError> { + let inner = check_mode!(self, Read); + Ok(inner.list_files()?.map(|x| x.to_string()).collect()) + } + + /// Return the list of the files in the archive, along with metadata + /// If `include_size` is set, the size will be included in the metadata + /// If `include_hash` is set, the hash (SHA256) will be included in the metadata + /// + /// Example: + /// ```python + /// metadatas = archive.list_files(include_size=True, include_hash=True) + /// for fname, metadata in metadatas.items(): + /// print(f"File {fname} has size {metadata.size} and hash {metadata.hash}") + /// ``` + #[pyo3(signature = (include_size=false, include_hash=false))] + fn list_files( + &mut self, + include_size: bool, + include_hash: bool, + ) -> Result, WrappedError> { + let inner = check_mode!(mut self, Read); + + let mut output = HashMap::new(); + let iter: Vec = inner.list_files()?.cloned().collect(); + for fname in iter { + let mut metadata = FileMetadata { + size: None, + hash: None, + }; + match inner { + ExplicitReaders::FileReader(mla) => { + if include_size { + metadata.size = Some( + mla.get_file(fname.clone())? + .ok_or(PyRuntimeError::new_err(format!( + "File {} not found", + fname + )))? + .size, + ); + } + if include_hash { + metadata.hash = Some( + mla.get_hash(&fname)? + .ok_or(PyRuntimeError::new_err(format!( + "File {} not found", + fname + )))?, + ); + } + } + } + output.insert(fname.to_string(), metadata); + } + Ok(output) + } + + /// Return whether the file is in the archive + fn __contains__(&self, key: &str) -> Result { + let inner = check_mode!(self, Read); + Ok(inner.list_files()?.any(|x| x == key)) + } + + /// Return the content of a file as bytes + fn __getitem__(&mut self, key: &str) -> Result, WrappedError> { + let inner = check_mode!(mut self, Read); + match inner { + ExplicitReaders::FileReader(reader) => { + let mut buf = Vec::new(); + let file = reader.get_file(key.to_string())?; + if let Some(mut archive_file) = file { + archive_file.data.read_to_end(&mut buf)?; + Ok(Cow::Owned(buf)) + } else { + Err(PyKeyError::new_err(format!("File {} not found", key)).into()) + } + } + } + } + + /// Add a file to the archive + fn __setitem__(&mut self, key: &str, value: &[u8]) -> Result<(), WrappedError> { + let writer = check_mode!(mut self, Write); + match writer { + ExplicitWriters::FileWriter(writer) => { + let mut reader = std::io::Cursor::new(value); + writer.add_file(key, value.len() as u64, &mut reader)?; + Ok(()) + } + } + } + + /// Return the number of file in the archive + fn __len__(&self) -> Result { + let inner = check_mode!(self, Read); + Ok(inner.list_files()?.count()) + } + + /// Finalize the archive creation. This API *must* be called or essential records will no be written + /// An archive can only be finalized once + fn finalize(&mut self) -> Result<(), WrappedError> { + let inner = check_mode!(mut self, Write); + Ok(inner.finalize()?) + } + + // Context management protocol (PEP 0343) + // https://docs.python.org/3/reference/datamodel.html#context-managers + fn __enter__(slf: PyRef) -> PyRef { + slf + } + + fn __exit__( + &mut self, + exc_type: Option<&PyAny>, + _exc_value: Option<&PyAny>, + _traceback: Option<&PyAny>, + ) -> Result { + if exc_type.is_some() { + // An exception occured, let it be raised again + return Ok(false); + } + + match self.inner { + OpeningModeInner::Read(_) => { + // Nothing to do, dropping this object should close the inner stream + } + OpeningModeInner::Write(ref mut writer) => { + // Finalize. If an exception occured, raise it + writer.finalize()?; + } + } + Ok(false) + } + + /// alias for io.BufferedIOBase + // Purpose: only one import + #[classattr] + fn _buffered_type(py: Python) -> Result<&PyType, WrappedError> { + Ok(py.import("io")?.getattr("BufferedIOBase")?.extract()?) + } + + /// Write an archive file to @dest, which can be: + /// - a string, corresponding to the output path + /// - a writable BufferedIOBase object (file-object like) + /// If a BufferedIOBase object is provided, the size of the chunck passed to `.write` can be adjusted + /// through @chunk_size (default to 4MB) + /// + /// Example: + /// ```python + /// with open("/path/to/extract/file1", "wb") as f: + /// archive.write_file_to("file1", f) + /// ``` + /// Or + /// ```python + /// archive.write_file_to("file1", "/path/to/extract/file1") + /// ``` + #[pyo3(signature = (key, dest, chunk_size=4194304))] + fn write_file_to( + &mut self, + py: Python, + key: &str, + dest: &PyAny, + chunk_size: usize, + ) -> Result<(), WrappedError> { + let reader = check_mode!(mut self, Read); + + let archive_file = match reader { + ExplicitReaders::FileReader(reader) => reader.get_file(key.to_string())?, + }; + + if let Ok(dest) = dest.downcast::() { + // dest is a String, this is a path + // `/path/to/dest` + let mut output = std::fs::File::create(dest.to_string())?; + io::copy(&mut archive_file.unwrap().data, &mut output)?; + } else if dest.is_instance(py.get_type::().getattr("_buffered_type")?)? { + // isinstance(dest, io.BufferedIOBase) + // offer `.write` (`.close` must be called from the caller) + + let src = &mut archive_file.unwrap().data; + let mut buf = Vec::from_iter(std::iter::repeat(0).take(chunk_size)); + while let Ok(n) = src.read(&mut buf) { + if n == 0 { + break; + } + dest.call_method1("write", (&buf[..n],))?; + } + } else { + return Err(PyTypeError::new_err( + "Expected a string or a file-object like (subclass of io.RawIOBase)", + ) + .into()); + } + Ok(()) + } + + /// Add a file to an archive from @src, which can be: + /// - a string, corresponding to the input path + /// - a readable BufferedIOBase object (file-object like) + /// If a BufferedIOBase object is provided, the size of the chunck passed to `.read` can be adjusted + /// through @chunk_size (default to 4MB) + /// + /// Example: + /// ```python + /// archive.add_file_from("file1", "/path/to/file1") + /// ``` + /// Or + /// ```python + /// with open("/path/to/file1", "rb") as f: + /// archive.add_file_from("file1", f) + /// ``` + #[pyo3(signature = (key, src, chunk_size=4194304))] + fn add_file_from( + &mut self, + py: Python, + key: &str, + src: &PyAny, + chunk_size: usize, + ) -> Result<(), WrappedError> { + let writer = check_mode!(mut self, Write); + + if let Ok(src) = src.downcast::() { + // src is a String, this is a path + // `/path/to/src` + let mut input = std::fs::File::open(src.to_string())?; + writer.add_file(key, input.metadata()?.len(), &mut input)?; + } else if src.is_instance(py.get_type::().getattr("_buffered_type")?)? { + // isinstance(src, io.BufferedIOBase) + // offer `.read` (`.close` must be called from the caller) + + let id = writer.start_file(key)?; + loop { + let data = src + .call_method1("read", (chunk_size,))? + .extract::<&PyBytes>()? + .as_bytes(); + if data.is_empty() { + break; + } + writer.append_file_content(id, data.len(), data)?; + } + writer.end_file(id)?; + } else { + return Err(PyTypeError::new_err( + "Expected a string or a file-object like (subclass of io.RawIOBase)", + ) + .into()); + } + Ok(()) + } +} + +// -------- Python module instanciation -------- + +/// Instanciate the Python module +#[pymodule] +#[pyo3(name = "mla")] +fn pymla(py: Python, m: &PyModule) -> PyResult<()> { + // Classes + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + // Exceptions + m.add("MLAError", py.get_type::())?; + m.add("WrongMagic", py.get_type::())?; + m.add("UnsupportedVersion", py.get_type::())?; + m.add("InvalidECCKeyFormat", py.get_type::())?; + m.add( + "WrongBlockSubFileType", + py.get_type::(), + )?; + m.add("UTF8ConversionError", py.get_type::())?; + m.add("FilenameTooLong", py.get_type::())?; + m.add( + "WrongArchiveWriterState", + py.get_type::(), + )?; + m.add("WrongReaderState", py.get_type::())?; + m.add("WrongWriterState", py.get_type::())?; + m.add("RandError", py.get_type::())?; + m.add("PrivateKeyNeeded", py.get_type::())?; + m.add( + "DeserializationError", + py.get_type::(), + )?; + m.add("SerializationError", py.get_type::())?; + m.add("MissingMetadata", py.get_type::())?; + m.add("BadAPIArgument", py.get_type::())?; + m.add("EndOfStream", py.get_type::())?; + m.add("ConfigError", py.get_type::())?; + m.add("DuplicateFilename", py.get_type::())?; + m.add( + "AuthenticatedDecryptionWrongTag", + py.get_type::(), + )?; + m.add( + "HKDFInvalidKeyLength", + py.get_type::(), + )?; + + // Add constants + m.add("LAYER_COMPRESS", Layers::COMPRESS.bits())?; + m.add("LAYER_ENCRYPT", Layers::ENCRYPT.bits())?; + m.add("LAYER_DEFAULT", Layers::DEFAULT.bits())?; + m.add("LAYER_EMPTY", Layers::EMPTY.bits())?; + m.add("DEFAULT_COMPRESSION_LEVEL", DEFAULT_COMPRESSION_LEVEL)?; + Ok(()) +} diff --git a/bindings/python/tests/test_mla.py b/bindings/python/tests/test_mla.py new file mode 100644 index 00000000..80cd530d --- /dev/null +++ b/bindings/python/tests/test_mla.py @@ -0,0 +1,570 @@ +import hashlib +import pytest +import tempfile +import os +import io + +import mla +from mla import MLAFile, MLAError + +# Test data +FILES = { + "file1": b"DATA1", + "file2": b"DATA_2", +} + +@pytest.fixture +def basic_archive(): + "Create a temporary archive and return its path" + fname = tempfile.mkstemp(suffix=".mla")[1] + archive = MLAFile(fname, "w") + for name, data in FILES.items(): + archive[name] = data + archive.finalize() + return fname + +def test_layers_bitflag_export(): + assert mla.LAYER_DEFAULT == mla.LAYER_COMPRESS | mla.LAYER_ENCRYPT + assert mla.LAYER_EMPTY == 0 + assert mla.LAYER_DEFAULT != mla.LAYER_EMPTY + +def test_bad_mode(): + "Ensure MLAFile with an unknown mode raise an error" + target_file = "/tmp/must_not_exists" + with pytest.raises(mla.BadAPIArgument): + MLAFile(target_file, "x") + # Ensure the file has not been created + with pytest.raises(FileNotFoundError): + open(target_file) + +def test_repr(): + "Ensure the repr is correct" + path = tempfile.mkstemp(suffix=".mla")[1] + archive = MLAFile(path, "w") + assert repr(archive) == "" % path + archive.finalize() + +def test_forbidden_in_write_mode(): + "Ensure some API cannot be called in write mode" + archive = MLAFile(tempfile.mkstemp(suffix=".mla")[1], "w") + + # .keys + with pytest.raises(mla.BadAPIArgument): + archive.keys() + + # __contains__ + with pytest.raises(mla.BadAPIArgument): + "name" in archive + + # __getitem__ + with pytest.raises(mla.BadAPIArgument): + archive["name"] + + # __len__ + with pytest.raises(mla.BadAPIArgument): + len(archive) + + # list_files + with pytest.raises(mla.BadAPIArgument): + archive.list_files() + +def test_forbidden_in_read_mode(basic_archive): + "Ensure some API cannot be called in write mode" + archive = MLAFile(basic_archive) + + # __setitem__ + with pytest.raises(mla.BadAPIArgument): + archive["file"] = b"data" + + # .finalize + with pytest.raises(mla.BadAPIArgument): + archive.finalize() + +def test_read_api(basic_archive): + "Test basics read APIs" + archive = MLAFile(basic_archive) + + # .keys + assert sorted(archive.keys()) == sorted(list(FILES.keys())) + + # __contains__ + assert "file1" in archive + assert "file3" not in archive + + # __getitem__ + assert archive["file1"] == FILES["file1"] + assert archive["file2"] == FILES["file2"] + with pytest.raises(KeyError): + archive["file3"] + + # __len__ + assert len(archive) == 2 + +def test_list_files(basic_archive): + "Test list files possibilities" + archive = MLAFile(basic_archive) + + # Basic + assert sorted(archive.list_files()) == sorted(list(FILES.keys())) + + # With size + assert sorted([ + (filename, info.size) for filename, info in archive.list_files(include_size=True).items() + ]) == sorted([ + (filename, len(data)) for filename, data in FILES.items() + ]) + + # With hash + assert sorted([ + (filename, info.hash) for filename, info in archive.list_files(include_hash=True).items() + ]) == sorted([ + (filename, hashlib.sha256(data).digest()) for filename, data in FILES.items() + ]) + + # With size and hash + assert sorted([ + (filename, info.size, info.hash) for filename, info in archive.list_files(include_size=True, include_hash=True).items() + ]) == sorted([ + (filename, len(data), hashlib.sha256(data).digest()) for filename, data in FILES.items() + ]) + +def test_write_api(): + "Test basics write APIs" + path = tempfile.mkstemp(suffix=".mla")[1] + archive = MLAFile(path, "w") + + # __setitem__ + for name, data in FILES.items(): + archive[name] = data + + # close + archive.finalize() + + # Check the resulting archive + archive = MLAFile(path) + assert sorted(archive.keys()) == sorted(list(FILES.keys())) + assert archive["file1"] == FILES["file1"] + assert archive["file2"] == FILES["file2"] + +def test_double_write(): + "Rewriting the file must raise an MLA error" + archive = MLAFile(tempfile.mkstemp(suffix=".mla")[1], "w") + archive["file1"] = FILES["file1"] + + with pytest.raises(mla.DuplicateFilename): + archive["file1"] = FILES["file1"] + +def test_context_read(basic_archive): + "Test reading using a `with` statement (context management protocol)" + with MLAFile(basic_archive) as mla: + assert sorted(mla.keys()) == sorted(list(FILES.keys())) + for name, data in FILES.items(): + assert mla[name] == data + +def test_context_write(): + "Test writing using a `with` statement (context management protocol)" + path = tempfile.mkstemp(suffix=".mla")[1] + with MLAFile(path, "w") as mla: + for name, data in FILES.items(): + mla[name] = data + + # Check the resulting file + with MLAFile(path) as mla: + assert sorted(mla.keys()) == sorted(list(FILES.keys())) + for name, data in FILES.items(): + assert mla[name] == data + +def test_context_write_error(): + "Raise an error during the context write __exit__" + with pytest.raises(mla.WrongArchiveWriterState): + with MLAFile(tempfile.mkstemp(suffix=".mla")[1], "w") as archive: + # INTENTIONNALY BUGGY + # .finalize will be called twice, causing an exception + archive.finalize() + +def test_context_write_error_in_with(): + "Raise an error in the with statement, it must be re-raised" + CustomException = type("CustomException", (Exception,), {}) + with pytest.raises(CustomException): + with MLAFile(tempfile.mkstemp(suffix=".mla")[1], "w") as mla: + # INTENTIONNALY BUGGY + raise CustomException + +def test_writer_config_layers(): + "Test writer config creation for layers" + # Enable and disable layers + config = mla.WriterConfig() + assert config.layers == mla.LAYER_DEFAULT + + config = mla.WriterConfig(layers=mla.LAYER_COMPRESS) + assert config.layers == mla.LAYER_COMPRESS + + config.enable_layer(mla.LAYER_ENCRYPT) + assert config.layers == mla.LAYER_COMPRESS | mla.LAYER_ENCRYPT + + config.disable_layer(mla.LAYER_COMPRESS) + assert config.layers == mla.LAYER_ENCRYPT + + config.disable_layer(mla.LAYER_ENCRYPT) + assert config.layers == mla.LAYER_EMPTY + + # Check for error on unknown layer (0xFF) + with pytest.raises(mla.BadAPIArgument): + config.enable_layer(0xFF) + + with pytest.raises(mla.BadAPIArgument): + config.disable_layer(0xFF) + + with pytest.raises(mla.BadAPIArgument): + config.set_layers(0xFF) + + with pytest.raises(mla.BadAPIArgument): + config = mla.WriterConfig(layers=0xFF) + + # Chaining + config = mla.WriterConfig().enable_layer( + mla.LAYER_COMPRESS + ).enable_layer( + mla.LAYER_ENCRYPT + ).disable_layer( + mla.LAYER_COMPRESS + ).set_layers( + mla.LAYER_COMPRESS + ) + assert config.layers == mla.LAYER_COMPRESS + +def test_writer_config_compression(): + "Test compression API in WriterConfig creation" + config = mla.WriterConfig() + with pytest.raises(OverflowError): + config.with_compression_level(-1) + with pytest.raises(mla.ConfigError): + config.with_compression_level(0xFF) + + # Value + config.with_compression_level(mla.DEFAULT_COMPRESSION_LEVEL) + assert config.compression_level == mla.DEFAULT_COMPRESSION_LEVEL + config.with_compression_level(1) + assert config.compression_level == 1 + + # Chaining + out = config.with_compression_level(mla.DEFAULT_COMPRESSION_LEVEL) + assert out is config + +# Expected: mla/bindings/python/tests/ +MLA_BASE_PATH = os.path.dirname( + os.path.dirname( + os.path.dirname( + os.path.dirname( + __file__ + ) + ) + ) +) +SAMPLE_PATH = os.path.join(MLA_BASE_PATH, "samples") + +def test_public_keys(): + "Test the PublicKeys object" + # Bad parsing + with pytest.raises(mla.InvalidECCKeyFormat): + mla.PublicKeys(b"NOT A KEY") + + with pytest.raises(FileNotFoundError): + mla.PublicKeys("/tmp/does_not_exists") + + # Open a PEM key, through path + pkeys_pem = mla.PublicKeys(os.path.join(SAMPLE_PATH, "test_ed25519_pub.pem")) + assert len(pkeys_pem.keys) == 1 + + # Open a DER key, through path + pkeys_der = mla.PublicKeys(os.path.join(SAMPLE_PATH, "test_ed25519_pub.der")) + assert len(pkeys_pem.keys) == 1 + + # Keys must be the same + assert pkeys_pem.keys == pkeys_der.keys + + # Open a PEM key, through data + pkeys_pem = mla.PublicKeys(open(os.path.join(SAMPLE_PATH, "test_ed25519_pub.pem"), "rb").read()) + assert len(pkeys_pem.keys) == 1 + + # Open a DER key, through data + pkeys_pem = mla.PublicKeys(open(os.path.join(SAMPLE_PATH, "test_ed25519_pub.der"), "rb").read()) + assert len(pkeys_pem.keys) == 1 + + # Keys must be the same + assert pkeys_pem.keys == pkeys_der.keys + + # Open several keys, using both path and data + pkeys = mla.PublicKeys( + os.path.join(SAMPLE_PATH, "test_ed25519_pub.pem"), + open(os.path.join(SAMPLE_PATH, "test_x25519_2_pub.pem"), "rb").read() + ) + assert len(pkeys.keys) == 2 + +def test_private_keys(): + "Test the PrivateKeys object" + # Bad parsing + with pytest.raises(mla.InvalidECCKeyFormat): + mla.PrivateKeys(b"NOT A KEY") + + with pytest.raises(mla.InvalidECCKeyFormat): + # This is a public key, not a private one + mla.PrivateKeys(os.path.join(SAMPLE_PATH, "test_ed25519_pub.pem")) + + with pytest.raises(FileNotFoundError): + mla.PrivateKeys("/tmp/does_not_exists") + + # Open a PEM key, through path + pkeys_pem = mla.PrivateKeys(os.path.join(SAMPLE_PATH, "test_ed25519.pem")) + assert len(pkeys_pem.keys) == 1 + + # Open a DER key, through path + pkeys_der = mla.PrivateKeys(os.path.join(SAMPLE_PATH, "test_ed25519.der")) + assert len(pkeys_pem.keys) == 1 + + # Keys must be the same + assert pkeys_pem.keys == pkeys_der.keys + + # Open a PEM key, through data + pkeys_pem = mla.PrivateKeys(open(os.path.join(SAMPLE_PATH, "test_ed25519.pem"), "rb").read()) + assert len(pkeys_pem.keys) == 1 + + # Open a DER key, through data + pkeys_pem = mla.PrivateKeys(open(os.path.join(SAMPLE_PATH, "test_ed25519.der"), "rb").read()) + assert len(pkeys_pem.keys) == 1 + + # Keys must be the same + assert pkeys_pem.keys == pkeys_der.keys + + # Open several keys, using both path and data + pkeys = mla.PrivateKeys( + os.path.join(SAMPLE_PATH, "test_ed25519.pem"), + open(os.path.join(SAMPLE_PATH, "test_x25519_2.pem"), "rb").read() + ) + assert len(pkeys.keys) == 2 + +def test_writer_config_public_keys(): + "Test public keys API in WriterConfig creation" + + # Test API call + config = mla.WriterConfig() + with pytest.raises(mla.InvalidECCKeyFormat): + config.set_public_keys(mla.PublicKeys(b"NOT A KEY")) + + # Test shortcut on object build + config = mla.WriterConfig( + public_keys=mla.PublicKeys(os.path.join(SAMPLE_PATH, "test_ed25519_pub.pem")) + ) + # Test the getter + assert len(config.public_keys.keys) == 1 + + # Chaining + out = config.set_public_keys(mla.PublicKeys( + os.path.join(SAMPLE_PATH, "test_ed25519_pub.pem"), + open(os.path.join(SAMPLE_PATH, "test_x25519_2_pub.pem"), "rb").read() + )) + assert out is config + assert len(config.public_keys.keys) == 2 + +def test_mlafile_bad_config(): + "Try to create a MLAFile with the wrong config parameter" + with pytest.raises(TypeError): + MLAFile(tempfile.mkstemp(suffix=".mla")[1], "w", config="NOT A CONFIG") + + with pytest.raises(TypeError): + MLAFile(tempfile.mkstemp(suffix=".mla")[1], "w", config=mla.ReaderConfig()) + + with pytest.raises(TypeError): + MLAFile(tempfile.mkstemp(suffix=".mla")[1], "r", config=mla.WriterConfig()) + + +def test_reader_config_api(): + "Test the ReaderConfig API" + # Add a remove private keys + config = mla.ReaderConfig() + assert config.private_keys is None + + config.set_private_keys( + mla.PrivateKeys(os.path.join(SAMPLE_PATH, "test_ed25519.pem")) + ) + assert len(config.private_keys.keys) == 1 + + config = mla.ReaderConfig(private_keys=mla.PrivateKeys(os.path.join(SAMPLE_PATH, "test_ed25519.pem"))) + assert len(config.private_keys.keys) == 1 + + # Chaining + config = mla.ReaderConfig() + out = config.set_private_keys( + mla.PrivateKeys(os.path.join(SAMPLE_PATH, "test_ed25519.pem")), + ) + assert out is config + +def test_write_then_read_encrypted(): + "Create an encrypted archive, then read it" + # Create the archive + path = tempfile.mkstemp(suffix=".mla")[1] + with MLAFile(path, "w", config=mla.WriterConfig( + public_keys=mla.PublicKeys(os.path.join(SAMPLE_PATH, "test_ed25519_pub.pem")), + layers=mla.LAYER_ENCRYPT + )) as archive: + for name, data in FILES.items(): + archive[name] = data + + # Read the archive + with MLAFile(path, config=mla.ReaderConfig( + private_keys=mla.PrivateKeys(os.path.join(SAMPLE_PATH, "test_ed25519.pem")) + )) as archive: + assert sorted(archive.keys()) == sorted(list(FILES.keys())) + for name, data in FILES.items(): + assert archive[name] == data + +def test_read_encrypted_archive_bad_key(): + "Try to read an encrypted archive with a bad key" + # Create the archive + path = tempfile.mkstemp(suffix=".mla")[1] + with MLAFile(path, "w", config=mla.WriterConfig( + public_keys=mla.PublicKeys(os.path.join(SAMPLE_PATH, "test_ed25519_pub.pem")), + layers=mla.LAYER_ENCRYPT + )) as archive: + for name, data in FILES.items(): + archive[name] = data + + # Try to read without a key + with pytest.raises(mla.PrivateKeyNeeded): + with MLAFile(path) as archive: + pass + + # Try to read with an incorrect key (mla.ConfigError: PrivateKeyNotFound) + with pytest.raises(mla.ConfigError): + with MLAFile(path, config=mla.ReaderConfig( + private_keys=mla.PrivateKeys(os.path.join(SAMPLE_PATH, "test_x25519_2.pem")) + )) as archive: + pass + +def test_write_file_to_str(basic_archive): + """Test archive.write_file_to(), using the String output version""" + # Temporary directory for extraction + tmpdir = tempfile.mkdtemp() + with MLAFile(basic_archive) as archive: + # Extract all files using the String output version + for name in archive.keys(): + archive.write_file_to(name, os.path.join(tmpdir, name)) + + # Check the files + for name, data in FILES.items(): + assert open(os.path.join(tmpdir, name), "rb").read() == data + +def test_write_file_to_file(basic_archive): + """Test archive.write_file_to(), using the File output version""" + # Temporary directory for extraction + tmpdir = tempfile.mkdtemp() + with MLAFile(basic_archive) as archive: + # Extract all files using the File output version + for name in archive.keys(): + with open(os.path.join(tmpdir, name), "wb") as f: + archive.write_file_to(name, f) + + # Check the files + for name, data in FILES.items(): + assert open(os.path.join(tmpdir, name), "rb").read() == data + + +class BytesIOCounter(io.BytesIO): + """ + Extend BytesIO to count the number of calls to `write` and `read` + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.write_count = 0 + self.read_count = 0 + + def write(self, *args, **kwargs): + self.write_count += 1 + return super().write(*args, **kwargs) + + def read(self, *args, **kwargs): + self.read_count += 1 + return super().read(*args, **kwargs) + + +def test_write_file_to_file_chunk_size(basic_archive): + """Test archive.write_file_to(), using the File output version""" + with MLAFile(basic_archive) as archive: + # Chunk size set to 1 -> expect 5 calls + output = BytesIOCounter() + archive.write_file_to("file1", output, chunk_size=1) + + # Check the number of calls + assert output.write_count == len(FILES["file1"]) + output.seek(0) + assert output.read() == FILES["file1"] + + # Chunk size set to 2 -> expect 3 calls + output = BytesIOCounter() + archive.write_file_to("file1", output, chunk_size=2) + + # Check the number of calls + assert output.write_count == len(FILES["file1"]) // 2 + 1 + output.seek(0) + assert output.read() == FILES["file1"] + +def test_add_file_from_str(): + "Test archive.add_file_from(), using the String input version" + # Create the archive + path = tempfile.mkstemp(suffix=".mla")[1] + with MLAFile(path, "w") as archive: + for name, data in FILES.items(): + # Create a file on disk to import + fname = tempfile.mkstemp()[1] + with open(fname, "wb") as f: + f.write(data) + # Import the file + archive.add_file_from(name, fname) + + # Read the archive + with MLAFile(path) as archive: + assert sorted(archive.keys()) == sorted(list(FILES.keys())) + for name, data in FILES.items(): + assert archive[name] == data + +def test_add_file_from_io(): + "Test archive.add_file_from(), using the IO input version" + # Create the archive + path = tempfile.mkstemp(suffix=".mla")[1] + with MLAFile(path, "w") as archive: + for name, data in FILES.items(): + # Use a buffered IO + f = io.BytesIO(data) + # Import the data + archive.add_file_from(name, f) + + # Read the archive + with MLAFile(path) as archive: + assert sorted(archive.keys()) == sorted(list(FILES.keys())) + for name, data in FILES.items(): + assert archive[name] == data + +def test_add_file_from_io_chunk_size(): + "Test archive.add_file_from(), using the IO input version" + for chunk_size in [1, 2]: + # Create the archive + path = tempfile.mkstemp(suffix=".mla")[1] + data = FILES["file1"] + with MLAFile(path, "w") as archive: + src = BytesIOCounter(data) + archive.add_file_from("file1", src, chunk_size=chunk_size) + + # Check the number of calls + if chunk_size == 1: + # Chunk size set to 1 -> expect 6 calls (5 with data, 1 empty) + assert src.read_count == len(data) + 1 + elif chunk_size == 2: + # Chunk size set to 2 -> expect 4 calls (3 with data, 1 empty) + assert src.read_count == 4 + + # Read the archive + with MLAFile(path) as archive: + assert archive["file1"] == data + + \ No newline at end of file diff --git a/mla/src/lib.rs b/mla/src/lib.rs index a5c4bdb9..bb7a3eb8 100644 --- a/mla/src/lib.rs +++ b/mla/src/lib.rs @@ -478,7 +478,6 @@ impl<'a, W: InnerWriterTrait> ArchiveWriter<'a, W> { ArchiveHeader { format_version: MLA_FORMAT_VERSION, config: config.to_persistent()?, - // TODO public_key hashes for easier decryption } .dump(&mut dest)?;