Skip to content

Commit

Permalink
feat: allow ignoring missing (404) files when importing
Browse files Browse the repository at this point in the history
  • Loading branch information
ctron committed Oct 31, 2024
1 parent 1164a4f commit 3e51716
Show file tree
Hide file tree
Showing 17 changed files with 149 additions and 75 deletions.
20 changes: 10 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ clap = "4"
concat-idents = "1"
cpe = "0.1.5"
csaf = { version = "0.5.0", default-features = false }
csaf-walker = { version = "0.9.0", default-features = false }
csaf-walker = { version = "0.10.0-alpha.1", default-features = false }
cve = "0.3.1"
cyclonedx-bom = "0.7.0"
env_logger = "0.11.0"
Expand Down Expand Up @@ -108,7 +108,7 @@ ring = "0.17.8"
roxmltree = "0.20.0"
rstest = "0.23.0"
rust-s3 = "0.35"
sbom-walker = { version = "0.9.0", default-features = false, features = ["crypto-openssl", "cyclonedx-bom", "spdx-rs"] }
sbom-walker = { version = "0.10.0-alpha.1", default-features = false, features = ["crypto-openssl", "cyclonedx-bom", "spdx-rs"] }
schemars = "0.8"
sea-orm = { version = "~1.0", features = ["debug-print"] } # See https://www.sea-ql.org/blog/2024-08-04-sea-orm-1.0/#release-planning
sea-orm-migration = "~1.0"
Expand Down Expand Up @@ -147,8 +147,8 @@ utoipa-redoc = { version = "5.0.0", features = ["actix-web"] }
utoipa-swagger-ui = "8.0.3"
uuid = "1.7.0"
walkdir = "2.5"
walker-common = "0.9.3"
walker-extras = "0.9.0"
walker-common = "0.10.0-alpha.1"
walker-extras = "0.10.0-alpha.1"
zip = "2.2.0"

trustify-auth = { path = "common/auth", features = ["actix", "swagger"] }
Expand Down
1 change: 1 addition & 0 deletions common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pub mod package;
pub mod purl;
pub mod reqwest;
pub mod sbom;
pub mod serde;
pub mod time;
pub mod tls;
pub mod uuid;
4 changes: 4 additions & 0 deletions common/src/serde.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/// Check if a value is its default value.
pub fn is_default<D: Default + Eq>(value: &D) -> bool {
value == &Default::default()
}
4 changes: 4 additions & 0 deletions modules/importer/src/model/csaf.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use super::*;
use trustify_common::serde::is_default;

#[derive(
Clone,
Expand All @@ -24,6 +25,9 @@ pub struct CsafImporter {

#[serde(default, skip_serializing_if = "Option::is_none")]
pub fetch_retries: Option<usize>,

#[serde(default, skip_serializing_if = "is_default")]
pub ignore_missing: bool,
}

impl Deref for CsafImporter {
Expand Down
6 changes: 5 additions & 1 deletion modules/importer/src/model/sbom.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use super::*;
use trustify_common::model::BinaryByteSize;
use trustify_common::{model::BinaryByteSize, serde::is_default};

#[derive(
Clone,
Expand Down Expand Up @@ -31,6 +31,9 @@ pub struct SbomImporter {

#[serde(default, skip_serializing_if = "Option::is_none")]
pub fetch_retries: Option<usize>,

#[serde(default, skip_serializing_if = "is_default")]
pub ignore_missing: bool,
}

impl Deref for SbomImporter {
Expand Down Expand Up @@ -78,6 +81,7 @@ mod test {
only_patterns: vec![],
size_limit: Some(bytesize::ByteSize::mib(1234).into()),
fetch_retries: None,
ignore_missing: false,
}
);

Expand Down
11 changes: 10 additions & 1 deletion modules/importer/src/runner/csaf/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ use csaf_walker::{
walker::Walker,
};
use parking_lot::Mutex;
use reqwest::StatusCode;
use std::collections::HashSet;
use std::{sync::Arc, time::SystemTime};
use tracing::instrument;
use trustify_module_ingestor::{graph::Graph, service::IngestorService};
Expand All @@ -46,6 +48,7 @@ impl super::ImportRunner {
v3_signatures,
only_patterns,
fetch_retries,
ignore_missing,
} = importer;

let report = Arc::new(Mutex::new(ReportBuilder::new()));
Expand All @@ -71,7 +74,13 @@ impl super::ImportRunner {

// wrap storage with report

let storage = CsafReportVisitor(ReportVisitor::new(report.clone(), storage));
let storage = CsafReportVisitor {
next: ReportVisitor::new(report.clone(), storage),
ignore_errors: match ignore_missing {
true => HashSet::from_iter([StatusCode::NOT_FOUND]),
false => HashSet::new(),
},
};

// validate (called by retriever)

Expand Down
57 changes: 34 additions & 23 deletions modules/importer/src/runner/csaf/report.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,59 +5,70 @@ use crate::runner::{
report::{Phase, ReportVisitor},
};
use csaf_walker::{
retrieve::RetrievalError,
source::{HttpSource, HttpSourceError},
validation::{ValidatedAdvisory, ValidatedVisitor, ValidationContext, ValidationError},
};
use reqwest::StatusCode;
use std::collections::HashSet;
use trustify_module_ingestor::service;
use walker_common::utils::url::Urlify;
use walker_common::{fetcher, retrieve::RetrievalError, utils::url::Urlify};

pub struct CsafReportVisitor<C: RunContext>(pub ReportVisitor<StorageVisitor<C>>);
pub struct CsafReportVisitor<C: RunContext> {
pub next: ReportVisitor<StorageVisitor<C>>,
pub ignore_errors: HashSet<StatusCode>,
}

impl<C: RunContext> ValidatedVisitor for CsafReportVisitor<C> {
type Error = <StorageVisitor<C> as ValidatedVisitor>::Error;
type Context = <StorageVisitor<C> as ValidatedVisitor>::Context;
impl<C: RunContext> ValidatedVisitor<HttpSource> for CsafReportVisitor<C> {
type Error = <StorageVisitor<C> as ValidatedVisitor<HttpSource>>::Error;
type Context = <StorageVisitor<C> as ValidatedVisitor<HttpSource>>::Context;

async fn visit_context(
&self,
context: &ValidationContext<'_>,
) -> Result<Self::Context, Self::Error> {
self.0.next.visit_context(context).await
self.next.next.visit_context(context).await
}

async fn visit_advisory(
&self,
context: &Self::Context,
result: Result<ValidatedAdvisory, ValidationError>,
result: Result<ValidatedAdvisory, ValidationError<HttpSource>>,
) -> Result<(), Self::Error> {
let file = result.url().to_string();

self.0.report.lock().tick();
self.next.report.lock().tick();

let result = self.0.next.visit_advisory(context, result).await;
let result = self.next.next.visit_advisory(context, result).await;

if let Err(err) = &result {
match err {
StorageError::Validation(ValidationError::Retrieval(
RetrievalError::InvalidResponse { code, .. },
)) => {
self.0.report.lock().add_error(
StorageError::Validation(ValidationError::Retrieval(err)) => {
self.next.report.lock().add_error(
Phase::Retrieval,
file,
format!("retrieval of document failed: {code}"),
format!("retrieval of document failed: {err}"),
);

if code.is_client_error() {
// If it's a client error, there's no need to re-try. We simply claim
// success after we logged it.
return Ok(());
// handle client error as non-retry error

if let RetrievalError::Source {
err: HttpSourceError::Fetcher(fetcher::Error::Request(err)),
discovered: _,
} = err
{
if let Some(status) = err.status() {
if self.ignore_errors.contains(&status) {
return Ok(());
}
}
}
}
StorageError::Validation(ValidationError::DigestMismatch {
expected,
actual,
..
}) => {
self.0.report.lock().add_error(
self.next.report.lock().add_error(
Phase::Validation,
file,
format!("digest mismatch - expected: {expected}, actual: {actual}"),
Expand All @@ -68,7 +79,7 @@ impl<C: RunContext> ValidatedVisitor for CsafReportVisitor<C> {
return Ok(());
}
StorageError::Validation(ValidationError::Signature { error, .. }) => {
self.0.report.lock().add_error(
self.next.report.lock().add_error(
Phase::Validation,
file,
format!("unable to verify signature: {error}"),
Expand All @@ -79,7 +90,7 @@ impl<C: RunContext> ValidatedVisitor for CsafReportVisitor<C> {
return Ok(());
}
StorageError::Processing(err) => {
self.0.report.lock().add_error(
self.next.report.lock().add_error(
Phase::Upload,
file,
format!("processing failed: {err}"),
Expand All @@ -90,7 +101,7 @@ impl<C: RunContext> ValidatedVisitor for CsafReportVisitor<C> {
return Ok(());
}
StorageError::Storage(err) => {
self.0.report.lock().add_error(
self.next.report.lock().add_error(
Phase::Upload,
file,
format!("upload failed: {err}"),
Expand Down
11 changes: 6 additions & 5 deletions modules/importer/src/runner/csaf/storage.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::runner::{common::storage::StorageError, context::RunContext, report::ReportBuilder};
use csaf_walker::validation::{
ValidatedAdvisory, ValidatedVisitor, ValidationContext, ValidationError,
use csaf_walker::{
source::Source,
validation::{ValidatedAdvisory, ValidatedVisitor, ValidationContext, ValidationError},
};
use parking_lot::Mutex;
use std::sync::Arc;
Expand All @@ -16,8 +17,8 @@ pub struct StorageVisitor<C: RunContext> {
pub labels: Labels,
}

impl<C: RunContext> ValidatedVisitor for StorageVisitor<C> {
type Error = StorageError<ValidationError>;
impl<C: RunContext, S: Source> ValidatedVisitor<S> for StorageVisitor<C> {
type Error = StorageError<ValidationError<S>>;
type Context = ();

async fn visit_context(&self, _: &ValidationContext<'_>) -> Result<Self::Context, Self::Error> {
Expand All @@ -27,7 +28,7 @@ impl<C: RunContext> ValidatedVisitor for StorageVisitor<C> {
async fn visit_advisory(
&self,
_context: &Self::Context,
result: Result<ValidatedAdvisory, ValidationError>,
result: Result<ValidatedAdvisory, ValidationError<S>>,
) -> Result<(), Self::Error> {
let doc = result?;
let location = doc.context.url().to_string();
Expand Down
11 changes: 10 additions & 1 deletion modules/importer/src/runner/sbom/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ use crate::{
server::context::WalkerProgress,
};
use parking_lot::Mutex;
use reqwest::StatusCode;
use sbom_walker::{
retrieve::RetrievingVisitor,
source::{HttpOptions, HttpSource},
validation::ValidationVisitor,
walker::Walker,
};
use std::collections::HashSet;
use std::{sync::Arc, time::SystemTime};
use tracing::instrument;
use trustify_module_ingestor::{graph::Graph, service::IngestorService};
Expand Down Expand Up @@ -49,6 +51,7 @@ impl super::ImportRunner {
only_patterns,
size_limit,
fetch_retries,
ignore_missing,
} = importer;

let url = Url::parse(&source).map_err(|err| ScannerError::Critical(err.into()))?;
Expand All @@ -74,7 +77,13 @@ impl super::ImportRunner {

// wrap storage with report

let storage = SbomReportVisitor(ReportVisitor::new(report.clone(), storage));
let storage = SbomReportVisitor {
next: ReportVisitor::new(report.clone(), storage),
ignore_errors: match ignore_missing {
true => HashSet::from_iter([StatusCode::NOT_FOUND]),
false => HashSet::new(),
},
};

// validate (called by retriever)

Expand Down
Loading

0 comments on commit 3e51716

Please sign in to comment.