Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Fixes TC-1179

Signed-off-by: Jim Crossley <[email protected]>
  • Loading branch information
jcrossley3 committed Apr 19, 2024
1 parent 9ddf983 commit 6dc8f13
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 27 deletions.
24 changes: 24 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added bombastic/testdata/bigjunk.bz2
Binary file not shown.
20 changes: 20 additions & 0 deletions bombastic/testdata/bigjunk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import bz2

CHUNKS=int(1000000/2)
CHUNKSIZE=100000
data = b'{' * CHUNKSIZE
data2 = b'}' * CHUNKSIZE
size = 0

with bz2.open('./bigjunk.bz2', 'wb') as f:
for _ in range(CHUNKS):
f.write(data)
size += CHUNKSIZE

f.write(b'"a":1"')

for _ in range(CHUNKS):
f.write(data2)
size += CHUNKSIZE

print(f'{size=}')
2 changes: 2 additions & 0 deletions integration-tests/src/bom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ fn bombastic_indexer() -> bombastic_indexer::Run {
access_key: Some("admin".into()),
secret_key: Some("password".into()),
validator: Validator::None,
max_size: ByteSize::gb(1),
},
bus: EventBusConfig {
event_bus: EventBusType::Kafka,
Expand Down Expand Up @@ -236,6 +237,7 @@ fn bombastic_api() -> bombastic_api::Run {
access_key: Some("admin".into()),
secret_key: Some("password".into()),
validator: Validator::SBOM,
max_size: ByteSize::gb(1),
},
infra: InfrastructureConfig {
infrastructure_enabled: false,
Expand Down
2 changes: 2 additions & 0 deletions integration-tests/src/vex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ fn vexination_indexer() -> vexination_indexer::Run {
access_key: Some("admin".into()),
secret_key: Some("password".into()),
validator: Validator::None,
max_size: ByteSize::gb(1),
},
infra: InfrastructureConfig {
infrastructure_enabled: false,
Expand Down Expand Up @@ -198,6 +199,7 @@ fn vexination_api() -> vexination_api::Run {
access_key: Some("admin".into()),
secret_key: Some("password".into()),
validator: Validator::VEX,
max_size: ByteSize::gb(1),
},
infra: InfrastructureConfig {
infrastructure_enabled: false,
Expand Down
2 changes: 2 additions & 0 deletions storage/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ prometheus = "0.13.3"
bombastic-model = { path = "../bombastic/model" }
csaf = "0.5.0"
hide = "0.1.1"
bytesize = "1"

[dev-dependencies]
rstest = "0.19"
test-log = { version = "0", features = ["env_logger", "trace"] }
12 changes: 11 additions & 1 deletion storage/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pub use key::*;

use async_stream::try_stream;
use bytes::Bytes;
use bytesize::ByteSize;
use futures::pin_mut;
use futures::{future::ok, stream::once, Stream, StreamExt};
use hide::Hide;
Expand All @@ -25,6 +26,7 @@ pub struct Storage {
bucket: Bucket,
metrics: Metrics,
validator: Validator,
max_size: ByteSize,
}

#[derive(Clone)]
Expand Down Expand Up @@ -141,6 +143,10 @@ pub struct StorageConfig {
/// Validation choice
#[arg(env = "VALIDATOR", long = "validator", default_value = "none")]
pub validator: Validator,

/// Maximum document size
#[arg(long, default_value_t = ByteSize::gb(1))]
pub max_size: ByteSize,
}

impl TryInto<Bucket> for StorageConfig {
Expand Down Expand Up @@ -222,6 +228,8 @@ pub enum Error {
InvalidKey(String),
#[error("invalid storage content")]
InvalidContent,
#[error("content exceeds max size: {0}")]
ExceedsMaxSize(ByteSize),
#[error("unexpected encoding {0}")]
Encoding(String),
#[error("Prometheus error {0}")]
Expand Down Expand Up @@ -292,11 +300,13 @@ pub struct Head {
impl Storage {
pub fn new(config: StorageConfig, registry: &Registry) -> Result<Self, Error> {
let validator = config.validator.clone();
let max_size = config.max_size;
let bucket = config.try_into()?;
Ok(Self {
bucket,
metrics: Metrics::register(registry)?,
validator,
max_size,
})
}

Expand Down Expand Up @@ -334,7 +344,7 @@ impl Storage {
);
let bucket = self.bucket.with_extra_headers(headers);

let data = self.validator.validate(encoding, Box::pin(data)).await?;
let data = self.validator.validate(self.max_size, encoding, Box::pin(data)).await?;
let mut rdr = stream::encoded_reader(DEFAULT_ENCODING, encoding, data)?;
let path = format!("{}{}", DATA_PATH, key);

Expand Down
97 changes: 71 additions & 26 deletions storage/src/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::{
};
use bombastic_model::prelude::SBOM as SBOMValidator;
use bytes::Bytes;
use bytesize::ByteSize;
use futures::{future::ok, pin_mut, stream::once, StreamExt};
use std::str::FromStr;

Expand Down Expand Up @@ -33,14 +34,15 @@ impl FromStr for Validator {
impl Validator {
pub async fn validate<'a>(
&self,
size: ByteSize,
encoding: Option<&str>,
data: ObjectStream<'a>,
) -> Result<ObjectStream<'a>, Error> {
use Validator::*;
match self {
None => Ok(data),
None => check(size, encoding, data, |_| Ok(())).await,
SBOM => {
check(encoding, data, |bytes| {
check(size, encoding, data, |bytes| {
SBOMValidator::parse(bytes).map_err(|e| {
log::error!("Invalid SBOM: {e}");
Error::InvalidContent
Expand All @@ -49,7 +51,7 @@ impl Validator {
.await
}
VEX => {
check(encoding, data, |bytes| {
check(size, encoding, data, |bytes| {
serde_json::from_slice::<csaf::Csaf>(bytes).map_err(|e| {
log::error!("Invalid VEX: {e}");
Error::InvalidContent
Expand All @@ -63,6 +65,7 @@ impl Validator {
}

async fn check<'a, T, F: Fn(&[u8]) -> Result<T, Error>>(
max: ByteSize,
encoding: Option<&str>,
data: ObjectStream<'a>,
parse: F,
Expand All @@ -71,7 +74,11 @@ async fn check<'a, T, F: Fn(&[u8]) -> Result<T, Error>>(
let mut bytes = vec![];
pin_mut!(data);
while let Some(chunk) = data.next().await {
bytes.extend_from_slice(&chunk?)
let slice = &chunk?;
if bytes.len() + slice.len() > max.0 as usize {
return Err(Error::ExceedsMaxSize(max));
}
bytes.extend_from_slice(slice)
}
parse(&bytes)?;
let s = once(ok(Bytes::copy_from_slice(&bytes)));
Expand All @@ -81,6 +88,7 @@ async fn check<'a, T, F: Fn(&[u8]) -> Result<T, Error>>(
#[cfg(test)]
mod tests {
use super::*;
use test_log::test;

async fn read(data: ObjectStream<'_>) -> Vec<u8> {
let mut bytes = vec![];
Expand All @@ -91,78 +99,115 @@ mod tests {
bytes
}

async fn test(v: Validator, enc: Option<&str>, expected: &[u8]) -> Result<Vec<u8>, Error> {
async fn test(v: Validator, max: ByteSize, enc: Option<&str>, expected: &[u8]) -> Result<Vec<u8>, Error> {
let src = once(ok(Bytes::copy_from_slice(expected)));
let sink = v.validate(enc, Box::pin(src)).await?;
let sink = v.validate(max, enc, Box::pin(src)).await?;
Ok(read(Box::pin(sink)).await)
}

#[tokio::test]
#[test(tokio::test)]
async fn none() -> Result<(), Error> {
let expected = include_bytes!("../../bombastic/testdata/ubi8-invalid.json");
let result = test(Validator::None, None, expected).await?;
let result = test(Validator::None, ByteSize::kb(100), None, expected).await?;
Ok(assert_eq!(expected[..], result[..]))
}

#[tokio::test]
#[test(tokio::test)]
async fn none_too_big() {
// Even non-validated docs are subject to max size
let expected = include_bytes!("../../bombastic/testdata/ubi8-invalid.json");
match test(Validator::None, ByteSize::b(100), None, expected).await.err() {
Some(Error::ExceedsMaxSize(_)) => (),
Some(e) => panic!("got {e} instead of ExceedsMaxSize"),
None => panic!("should've gotten ExceedsMaxSize"),
}
}

#[test(tokio::test)]
async fn sbom_json_valid() -> Result<(), Error> {
let expected = include_bytes!("../../bombastic/testdata/ubi8-valid.json");
let result = test(Validator::SBOM, None, expected).await?;
let result = test(Validator::SBOM, ByteSize::kb(100), None, expected).await?;
Ok(assert_eq!(expected[..], result[..]))
}

#[tokio::test]
#[test(tokio::test)]
async fn sbom_json_invalid() {
let expected = include_bytes!("../../bombastic/testdata/ubi8-invalid.json");
assert!(test(Validator::SBOM, None, expected).await.is_err())
assert!(test(Validator::SBOM, ByteSize::kb(100), None, expected).await.is_err())
}

#[tokio::test]
#[test(tokio::test)]
async fn sbom_bzip2_valid() -> Result<(), Error> {
let expected = include_bytes!("../../bombastic/testdata/ubi8-valid.json.bz2");
let result = test(Validator::SBOM, Some("bzip2"), expected).await?;
let result = test(Validator::SBOM, ByteSize::kb(100), Some("bzip2"), expected).await?;
// This exact file was obtained from a Red Hat internal
// repo. I think it's safe to ignore the 4-byte bz2 header, as
// it's the block-size (4th byte) that's different: 6 vs 9. I
// think we can chalk that up to different bzip2 encoders.
Ok(assert_eq!(expected[4..], result[4..]))
}

#[tokio::test]
#[test(tokio::test)]
async fn sbom_bzip2_invalid() {
let expected = include_bytes!("../../bombastic/testdata/ubi8-invalid.json.bz2");
assert!(test(Validator::SBOM, Some("bzip2"), expected).await.is_err())
assert!(test(Validator::SBOM, ByteSize::kb(100), Some("bzip2"), expected)
.await
.is_err())
}

#[tokio::test]
#[test(tokio::test)]
async fn sbom_bzip2_bigjunk() {
let expected = include_bytes!("../../bombastic/testdata/bigjunk.bz2");
match test(Validator::SBOM, ByteSize::kb(100), Some("bzip2"), expected)
.await
.err()
{
Some(Error::ExceedsMaxSize(_)) => (),
Some(e) => panic!("got {e} instead of ExceedsMaxSize"),
None => panic!("should've gotten ExceedsMaxSize"),
}
}

#[test(tokio::test)]
async fn sbom_bzip2_invalid_license() {
let expected = include_bytes!("../../bombastic/testdata/3amp-2.json.bz2");
assert!(test(Validator::SBOM, Some("bzip2"), expected).await.is_err())
match test(Validator::SBOM, ByteSize::gb(1), Some("bzip2"), expected)
.await
.err()
{
Some(Error::InvalidContent) => (),
Some(e) => panic!("got `{e}` instead of InvalidContent"),
None => panic!("should've gotten InvalidContent"),
}
}

#[tokio::test]
#[test(tokio::test)]
async fn sbom_zstd_valid() -> Result<(), Error> {
let expected = include_bytes!("../../bombastic/testdata/ubi8-valid.json.zst");
let result = test(Validator::SBOM, Some("zstd"), expected).await?;
let result = test(Validator::SBOM, ByteSize::kb(100), Some("zstd"), expected).await?;
Ok(assert_eq!(expected[..], result[..]))
}

#[tokio::test]
#[test(tokio::test)]
async fn sbom_zstd_invalid() {
let expected = include_bytes!("../../bombastic/testdata/ubi8-invalid.json.zst");
assert!(test(Validator::SBOM, Some("zstd"), expected).await.is_err())
assert!(test(Validator::SBOM, ByteSize::kb(100), Some("zstd"), expected)
.await
.is_err())
}

#[tokio::test]
#[test(tokio::test)]
async fn vex_json_valid() -> Result<(), Error> {
let expected = include_bytes!("../../vexination/testdata/rhsa-2023_1441.json");
let result = test(Validator::VEX, None, expected).await?;
let result = test(Validator::VEX, ByteSize::kb(100), None, expected).await?;
Ok(assert_eq!(expected[..], result[..]))
}

#[tokio::test]
#[test(tokio::test)]
async fn vex_json_invalid() {
let expected = include_bytes!("../../vexination/testdata/rhsa-2023_1441.json");
assert!(test(Validator::VEX, None, &expected[10..]).await.is_err())
assert!(test(Validator::VEX, ByteSize::kb(100), None, &expected[10..])
.await
.is_err())
}
}

0 comments on commit 6dc8f13

Please sign in to comment.