From 3f0719b4bcae5f10561a27256bff80abb9466a2e Mon Sep 17 00:00:00 2001 From: Jens Reimann Date: Tue, 16 Apr 2024 17:30:44 +0200 Subject: [PATCH] fix: use caches to prevent repeated lookups --- .gitignore | 4 +- modules/graph/src/graph/sbom/mod.rs | 67 ++++++++++++-- modules/graph/src/graph/sbom/spdx/mod.rs | 113 ++++++++++++++--------- modules/graph/src/graph/sbom/tests.rs | 18 ++++ 4 files changed, 148 insertions(+), 54 deletions(-) diff --git a/.gitignore b/.gitignore index 5fa68235c..f4c03c999 100644 --- a/.gitignore +++ b/.gitignore @@ -4,5 +4,5 @@ target /data .trustify -/flamegraph.svg -/perf.data +/flamegraph*.svg +/perf.data* diff --git a/modules/graph/src/graph/sbom/mod.rs b/modules/graph/src/graph/sbom/mod.rs index 96d982781..51cf1840b 100644 --- a/modules/graph/src/graph/sbom/mod.rs +++ b/modules/graph/src/graph/sbom/mod.rs @@ -11,8 +11,11 @@ use sea_orm::{ QueryTrait, RelationTrait, Select, Set, }; use sea_query::{Condition, Func, JoinType, Query, SimpleExpr}; +use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; use std::fmt::{Debug, Formatter}; +use std::rc::Rc; +use std::sync::Arc; use time::OffsetDateTime; use tracing::instrument; use trustify_common::cpe::Cpe; @@ -439,13 +442,15 @@ impl SbomContext { /// Within the context of *this* SBOM, ingest a relationship between /// two packages. #[instrument(skip(tx), err)] - async fn ingest_package_relates_to_package>( - &self, + async fn ingest_package_relates_to_package<'a, TX: AsRef>( + &'a self, + cache: &mut PackageCache<'a>, left_package_input: Purl, relationship: Relationship, right_package_input: Purl, tx: TX, ) -> Result<(), Error> { + /* let left_package = self .graph .ingest_qualified_package(left_package_input.clone(), &tx) @@ -454,9 +459,12 @@ impl SbomContext { let right_package = self .graph .ingest_qualified_package(right_package_input.clone(), &tx) - .await; + .await;*/ + + let left_package = cache.lookup(&left_package_input).await; + let right_package = cache.lookup(&right_package_input).await; - match (&left_package, &right_package) { + match (&*left_package, &*right_package) { (Ok(left_package), Ok(right_package)) => { if entity::package_relates_to_package::Entity::find() .filter(entity::package_relates_to_package::Column::SbomId.eq(self.sbom.id)) @@ -488,20 +496,20 @@ impl SbomContext { (Err(_), Err(_)) => { log::warn!( "unable to ingest relationships between non-fully-qualified packages {}, {}", - left_package_input.to_string(), - right_package_input.to_string() + left_package_input, + right_package_input, ); } (Err(_), Ok(_)) => { log::warn!( "unable to ingest relationships involving a non-fully-qualified package {}", - left_package_input.to_string() + left_package_input ); } (Ok(_), Err(_)) => { log::warn!( "unable to ingest relationships involving a non-fully-qualified package {}", - right_package_input.to_string() + right_package_input ); } } @@ -702,3 +710,46 @@ impl SbomContext { */ } + +pub struct PackageCache<'a> { + cache: HashMap, Error>>>, + graph: &'a Graph, + tx: &'a Transactional, + hits: usize, +} + +impl<'a> Debug for PackageCache<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PackageCache") + .field("cache", &self.cache.len()) + .field("hits", &self.hits) + .finish() + } +} + +impl<'a> PackageCache<'a> { + pub fn new(capacity: usize, graph: &'a Graph, tx: &'a Transactional) -> Self { + Self { + cache: HashMap::with_capacity(capacity), + graph, + tx, + hits: 0, + } + } + + pub async fn lookup(&mut self, purl: &Purl) -> Rc, Error>> { + match self.cache.entry(purl.clone()) { + Entry::Occupied(entry) => { + self.hits += 1; + entry.get().clone() + } + Entry::Vacant(entry) => { + let result = self + .graph + .ingest_qualified_package(purl.clone(), &self.tx) + .await; + entry.insert(Rc::new(result)).clone() + } + } + } +} diff --git a/modules/graph/src/graph/sbom/spdx/mod.rs b/modules/graph/src/graph/sbom/spdx/mod.rs index 00fc910bb..af8721389 100644 --- a/modules/graph/src/graph/sbom/spdx/mod.rs +++ b/modules/graph/src/graph/sbom/spdx/mod.rs @@ -1,10 +1,11 @@ mod tests; use crate::graph::error::Error; -use crate::graph::sbom::{SbomContext, SbomInformation}; +use crate::graph::sbom::{PackageCache, SbomContext, SbomInformation}; use sea_orm::TransactionTrait; use serde_json::Value; use spdx_rs::models::{RelationshipType, SPDX}; +use std::collections::HashMap; use std::io::Read; use time::OffsetDateTime; use tracing::instrument; @@ -39,73 +40,97 @@ impl SbomContext { sbom_data: SPDX, tx: TX, ) -> Result<(), anyhow::Error> { + // create a lookup cache for id -> package information + let mut cache = HashMap::with_capacity(sbom_data.package_information.len()); + + for pi in &sbom_data.package_information { + cache.insert(&pi.package_spdx_identifier, pi); + } + // For each thing described in the SBOM data, link it up to an sbom_cpe or sbom_package. for described in &sbom_data.document_creation_information.document_describes { - for described_package in sbom_data - .package_information - .iter() - .filter(|each| each.package_spdx_identifier.eq(described)) - { - for reference in &described_package.external_reference { - if reference.reference_type == "purl" { - //log::debug!("describes pkg {}", reference.reference_locator); + let Some(described_package) = cache.get(described) else { + continue; + }; + + for reference in &described_package.external_reference { + match reference.reference_type.as_str() { + "purl" => { self.ingest_describes_package( reference.reference_locator.as_str().try_into()?, &tx, ) .await?; - } else if reference.reference_type == "cpe22Type" { - //log::debug!("describes cpe22 {}", reference.reference_locator); + } + "cpe22Type" => { if let Ok(cpe) = cpe::uri::Uri::parse(&reference.reference_locator) { self.ingest_describes_cpe22(cpe, &tx).await?; } } + _ => {} } } } + let mut lookup_cache = PackageCache::new( + sbom_data.package_information.len(), + &self.graph, + tx.as_ref(), + ); + // connect all other tree-ish package trees in the context of this sbom. for package_info in &sbom_data.package_information { - let package_identifier = &package_info.package_spdx_identifier; for package_ref in &package_info.external_reference { - if package_ref.reference_type == "purl" { - let package_a = package_ref.reference_locator.clone(); - //log::debug!("pkg_a: {}", package_a); - - for relationship in sbom_data.relationships_for_spdx_id(package_identifier) { - if let Some(package) = sbom_data.package_information.iter().find(|each| { - each.package_spdx_identifier == relationship.related_spdx_element - }) { - for reference in &package.external_reference { - if reference.reference_type == "purl" { - let package_b = reference.reference_locator.clone(); - - // Check for the degenerate case that seems to appear where an SBOM inceptions itself. - if package_a != package_b { - if let Ok((left, rel, right)) = SpdxRelationship( - &package_a, - &relationship.relationship_type, - &package_b, - ) - .try_into() - { - self.ingest_package_relates_to_package( - left.try_into()?, - rel, - right.try_into()?, - &tx, - ) - .await? - } - } - } - } + if package_ref.reference_type != "purl" { + continue; + } + + let package_a = &package_ref.reference_locator; + //log::debug!("pkg_a: {}", package_a); + + 'rels: for relationship in + sbom_data.relationships_for_spdx_id(&package_info.package_spdx_identifier) + { + let Some(package) = cache.get(&relationship.related_spdx_element) else { + continue 'rels; + }; + + 'refs: for reference in &package.external_reference { + if reference.reference_type != "purl" { + continue 'refs; + } + + let package_b = &reference.reference_locator; + + // Check for the degenerate case that seems to appear where an SBOM inceptions itself. + if package_a == package_b { + continue 'refs; } + + // check if we have a valid relationship + let Ok((left, rel, right)) = + SpdxRelationship(package_a, &relationship.relationship_type, package_b) + .try_into() + else { + continue 'refs; + }; + + // now add it + self.ingest_package_relates_to_package( + &mut lookup_cache, + left.try_into()?, + rel, + right.try_into()?, + &tx, + ) + .await? } } } } + log::info!("Package cache: {lookup_cache:?}"); + Ok(()) } } diff --git a/modules/graph/src/graph/sbom/tests.rs b/modules/graph/src/graph/sbom/tests.rs index c5ab21bbe..0d537598f 100644 --- a/modules/graph/src/graph/sbom/tests.rs +++ b/modules/graph/src/graph/sbom/tests.rs @@ -1,6 +1,8 @@ #![cfg(test)] +use crate::graph::sbom::PackageCache; use crate::graph::Graph; +use std::collections::HashMap; use std::convert::TryInto; use test_log::test; use trustify_common::db::{Database, Transactional}; @@ -198,6 +200,8 @@ async fn transitive_dependency_of() -> Result<(), anyhow::Error> { let db = Database::for_test("transitive_dependency_of").await?; let system = Graph::new(db); + let mut cache = PackageCache::new(1, &system, &Transactional::None); + let sbom1 = system .ingest_sbom( "http://sbomsRus.gov/thing1.json", @@ -210,6 +214,7 @@ async fn transitive_dependency_of() -> Result<(), anyhow::Error> { sbom1 .ingest_package_relates_to_package( + &mut cache, "pkg://maven/io.quarkus/transitive-b@1.2.3".try_into()?, Relationship::DependencyOf, "pkg://maven/io.quarkus/transitive-a@1.2.3".try_into()?, @@ -219,6 +224,7 @@ async fn transitive_dependency_of() -> Result<(), anyhow::Error> { sbom1 .ingest_package_relates_to_package( + &mut cache, "pkg://maven/io.quarkus/transitive-c@1.2.3".try_into()?, Relationship::DependencyOf, "pkg://maven/io.quarkus/transitive-b@1.2.3".try_into()?, @@ -228,6 +234,7 @@ async fn transitive_dependency_of() -> Result<(), anyhow::Error> { sbom1 .ingest_package_relates_to_package( + &mut cache, "pkg://maven/io.quarkus/transitive-d@1.2.3".try_into()?, Relationship::DependencyOf, "pkg://maven/io.quarkus/transitive-c@1.2.3".try_into()?, @@ -237,6 +244,7 @@ async fn transitive_dependency_of() -> Result<(), anyhow::Error> { sbom1 .ingest_package_relates_to_package( + &mut cache, "pkg://maven/io.quarkus/transitive-e@1.2.3".try_into()?, Relationship::DependencyOf, "pkg://maven/io.quarkus/transitive-c@1.2.3".try_into()?, @@ -246,6 +254,7 @@ async fn transitive_dependency_of() -> Result<(), anyhow::Error> { sbom1 .ingest_package_relates_to_package( + &mut cache, "pkg://maven/io.quarkus/transitive-d@1.2.3".try_into()?, Relationship::DependencyOf, "pkg://maven/io.quarkus/transitive-b@1.2.3".try_into()?, @@ -269,6 +278,8 @@ async fn ingest_package_relates_to_package_dependency_of() -> Result<(), anyhow: let db = Database::for_test("ingest_contains_packages").await?; let system = Graph::new(db); + let mut cache = PackageCache::new(1, &system, &Transactional::None); + let sbom1 = system .ingest_sbom( "http://sbomsRus.gov/thing1.json", @@ -281,6 +292,7 @@ async fn ingest_package_relates_to_package_dependency_of() -> Result<(), anyhow: sbom1 .ingest_package_relates_to_package( + &mut cache, "pkg://maven/io.quarkus/quarkus-postgres@1.2.3".try_into()?, Relationship::DependencyOf, "pkg://maven/io.quarkus/quarkus-core@1.2.3".try_into()?, @@ -300,6 +312,7 @@ async fn ingest_package_relates_to_package_dependency_of() -> Result<(), anyhow: sbom2 .ingest_package_relates_to_package( + &mut cache, "pkg://maven/io.quarkus/quarkus-sqlite@1.2.3".try_into()?, Relationship::DependencyOf, "pkg://maven/io.quarkus/quarkus-core@1.2.3".try_into()?, @@ -345,6 +358,8 @@ async fn sbom_vulnerabilities() -> Result<(), anyhow::Error> { let db = Database::for_test("sbom_vulnerabilities").await?; let system = Graph::new(db); + let mut cache = PackageCache::new(1, &system, &Transactional::None); + println!("{:?}", system); let sbom = system @@ -364,6 +379,7 @@ async fn sbom_vulnerabilities() -> Result<(), anyhow::Error> { println!("-------------------- B"); sbom.ingest_package_relates_to_package( + &mut cache, "pkg://maven/io.quarkus/quarkus-core@1.2.3".try_into()?, Relationship::DependencyOf, "pkg://oci/my-app@1.2.3".try_into()?, @@ -373,6 +389,7 @@ async fn sbom_vulnerabilities() -> Result<(), anyhow::Error> { println!("-------------------- C"); sbom.ingest_package_relates_to_package( + &mut cache, "pkg://maven/io.quarkus/quarkus-postgres@1.2.3".try_into()?, Relationship::DependencyOf, "pkg://maven/io.quarkus/quarkus-core@1.2.3".try_into()?, @@ -382,6 +399,7 @@ async fn sbom_vulnerabilities() -> Result<(), anyhow::Error> { println!("-------------------- D"); sbom.ingest_package_relates_to_package( + &mut cache, "pkg://maven/postgres/postgres-driver@1.2.3".try_into()?, Relationship::DependencyOf, "pkg://maven/io.quarkus/quarkus-postgres@1.2.3".try_into()?,