Skip to content

Commit

Permalink
fix: use caches to prevent repeated lookups
Browse files Browse the repository at this point in the history
  • Loading branch information
ctron authored and Bob McWhirter committed Apr 16, 2024
1 parent c6547d1 commit 3f0719b
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 54 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ target
/data
.trustify

/flamegraph.svg
/perf.data
/flamegraph*.svg
/perf.data*
67 changes: 59 additions & 8 deletions modules/graph/src/graph/sbom/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@ use sea_orm::{
QueryTrait, RelationTrait, Select, Set,
};
use sea_query::{Condition, Func, JoinType, Query, SimpleExpr};
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
use std::fmt::{Debug, Formatter};
use std::rc::Rc;
use std::sync::Arc;
use time::OffsetDateTime;
use tracing::instrument;
use trustify_common::cpe::Cpe;
Expand Down Expand Up @@ -439,13 +442,15 @@ impl SbomContext {
/// Within the context of *this* SBOM, ingest a relationship between
/// two packages.
#[instrument(skip(tx), err)]
async fn ingest_package_relates_to_package<TX: AsRef<Transactional>>(
&self,
async fn ingest_package_relates_to_package<'a, TX: AsRef<Transactional>>(
&'a self,
cache: &mut PackageCache<'a>,
left_package_input: Purl,
relationship: Relationship,
right_package_input: Purl,
tx: TX,
) -> Result<(), Error> {
/*
let left_package = self
.graph
.ingest_qualified_package(left_package_input.clone(), &tx)
Expand All @@ -454,9 +459,12 @@ impl SbomContext {
let right_package = self
.graph
.ingest_qualified_package(right_package_input.clone(), &tx)
.await;
.await;*/

let left_package = cache.lookup(&left_package_input).await;
let right_package = cache.lookup(&right_package_input).await;

match (&left_package, &right_package) {
match (&*left_package, &*right_package) {
(Ok(left_package), Ok(right_package)) => {
if entity::package_relates_to_package::Entity::find()
.filter(entity::package_relates_to_package::Column::SbomId.eq(self.sbom.id))
Expand Down Expand Up @@ -488,20 +496,20 @@ impl SbomContext {
(Err(_), Err(_)) => {
log::warn!(
"unable to ingest relationships between non-fully-qualified packages {}, {}",
left_package_input.to_string(),
right_package_input.to_string()
left_package_input,
right_package_input,
);
}
(Err(_), Ok(_)) => {
log::warn!(
"unable to ingest relationships involving a non-fully-qualified package {}",
left_package_input.to_string()
left_package_input
);
}
(Ok(_), Err(_)) => {
log::warn!(
"unable to ingest relationships involving a non-fully-qualified package {}",
right_package_input.to_string()
right_package_input
);
}
}
Expand Down Expand Up @@ -702,3 +710,46 @@ impl SbomContext {
*/
}

pub struct PackageCache<'a> {
cache: HashMap<Purl, Rc<Result<QualifiedPackageContext<'a>, Error>>>,
graph: &'a Graph,
tx: &'a Transactional,
hits: usize,
}

impl<'a> Debug for PackageCache<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("PackageCache")
.field("cache", &self.cache.len())
.field("hits", &self.hits)
.finish()
}
}

impl<'a> PackageCache<'a> {
pub fn new(capacity: usize, graph: &'a Graph, tx: &'a Transactional) -> Self {
Self {
cache: HashMap::with_capacity(capacity),
graph,
tx,
hits: 0,
}
}

pub async fn lookup(&mut self, purl: &Purl) -> Rc<Result<QualifiedPackageContext<'a>, Error>> {
match self.cache.entry(purl.clone()) {
Entry::Occupied(entry) => {
self.hits += 1;
entry.get().clone()
}
Entry::Vacant(entry) => {
let result = self
.graph
.ingest_qualified_package(purl.clone(), &self.tx)
.await;
entry.insert(Rc::new(result)).clone()
}
}
}
}
113 changes: 69 additions & 44 deletions modules/graph/src/graph/sbom/spdx/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
mod tests;

use crate::graph::error::Error;
use crate::graph::sbom::{SbomContext, SbomInformation};
use crate::graph::sbom::{PackageCache, SbomContext, SbomInformation};
use sea_orm::TransactionTrait;
use serde_json::Value;
use spdx_rs::models::{RelationshipType, SPDX};
use std::collections::HashMap;
use std::io::Read;
use time::OffsetDateTime;
use tracing::instrument;
Expand Down Expand Up @@ -39,73 +40,97 @@ impl SbomContext {
sbom_data: SPDX,
tx: TX,
) -> Result<(), anyhow::Error> {
// create a lookup cache for id -> package information
let mut cache = HashMap::with_capacity(sbom_data.package_information.len());

for pi in &sbom_data.package_information {
cache.insert(&pi.package_spdx_identifier, pi);
}

// For each thing described in the SBOM data, link it up to an sbom_cpe or sbom_package.
for described in &sbom_data.document_creation_information.document_describes {
for described_package in sbom_data
.package_information
.iter()
.filter(|each| each.package_spdx_identifier.eq(described))
{
for reference in &described_package.external_reference {
if reference.reference_type == "purl" {
//log::debug!("describes pkg {}", reference.reference_locator);
let Some(described_package) = cache.get(described) else {
continue;
};

for reference in &described_package.external_reference {
match reference.reference_type.as_str() {
"purl" => {
self.ingest_describes_package(
reference.reference_locator.as_str().try_into()?,
&tx,
)
.await?;
} else if reference.reference_type == "cpe22Type" {
//log::debug!("describes cpe22 {}", reference.reference_locator);
}
"cpe22Type" => {
if let Ok(cpe) = cpe::uri::Uri::parse(&reference.reference_locator) {
self.ingest_describes_cpe22(cpe, &tx).await?;
}
}
_ => {}
}
}
}

let mut lookup_cache = PackageCache::new(
sbom_data.package_information.len(),
&self.graph,
tx.as_ref(),
);

// connect all other tree-ish package trees in the context of this sbom.
for package_info in &sbom_data.package_information {
let package_identifier = &package_info.package_spdx_identifier;
for package_ref in &package_info.external_reference {
if package_ref.reference_type == "purl" {
let package_a = package_ref.reference_locator.clone();
//log::debug!("pkg_a: {}", package_a);

for relationship in sbom_data.relationships_for_spdx_id(package_identifier) {
if let Some(package) = sbom_data.package_information.iter().find(|each| {
each.package_spdx_identifier == relationship.related_spdx_element
}) {
for reference in &package.external_reference {
if reference.reference_type == "purl" {
let package_b = reference.reference_locator.clone();

// Check for the degenerate case that seems to appear where an SBOM inceptions itself.
if package_a != package_b {
if let Ok((left, rel, right)) = SpdxRelationship(
&package_a,
&relationship.relationship_type,
&package_b,
)
.try_into()
{
self.ingest_package_relates_to_package(
left.try_into()?,
rel,
right.try_into()?,
&tx,
)
.await?
}
}
}
}
if package_ref.reference_type != "purl" {
continue;
}

let package_a = &package_ref.reference_locator;
//log::debug!("pkg_a: {}", package_a);

'rels: for relationship in
sbom_data.relationships_for_spdx_id(&package_info.package_spdx_identifier)
{
let Some(package) = cache.get(&relationship.related_spdx_element) else {
continue 'rels;
};

'refs: for reference in &package.external_reference {
if reference.reference_type != "purl" {
continue 'refs;
}

let package_b = &reference.reference_locator;

// Check for the degenerate case that seems to appear where an SBOM inceptions itself.
if package_a == package_b {
continue 'refs;
}

// check if we have a valid relationship
let Ok((left, rel, right)) =
SpdxRelationship(package_a, &relationship.relationship_type, package_b)
.try_into()
else {
continue 'refs;
};

// now add it
self.ingest_package_relates_to_package(
&mut lookup_cache,
left.try_into()?,
rel,
right.try_into()?,
&tx,
)
.await?
}
}
}
}

log::info!("Package cache: {lookup_cache:?}");

Ok(())
}
}
Expand Down
18 changes: 18 additions & 0 deletions modules/graph/src/graph/sbom/tests.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#![cfg(test)]

use crate::graph::sbom::PackageCache;
use crate::graph::Graph;
use std::collections::HashMap;
use std::convert::TryInto;
use test_log::test;
use trustify_common::db::{Database, Transactional};
Expand Down Expand Up @@ -198,6 +200,8 @@ async fn transitive_dependency_of() -> Result<(), anyhow::Error> {
let db = Database::for_test("transitive_dependency_of").await?;
let system = Graph::new(db);

let mut cache = PackageCache::new(1, &system, &Transactional::None);

let sbom1 = system
.ingest_sbom(
"http://sbomsRus.gov/thing1.json",
Expand All @@ -210,6 +214,7 @@ async fn transitive_dependency_of() -> Result<(), anyhow::Error> {

sbom1
.ingest_package_relates_to_package(
&mut cache,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Relationship::DependencyOf,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Expand All @@ -219,6 +224,7 @@ async fn transitive_dependency_of() -> Result<(), anyhow::Error> {

sbom1
.ingest_package_relates_to_package(
&mut cache,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Relationship::DependencyOf,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Expand All @@ -228,6 +234,7 @@ async fn transitive_dependency_of() -> Result<(), anyhow::Error> {

sbom1
.ingest_package_relates_to_package(
&mut cache,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Relationship::DependencyOf,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Expand All @@ -237,6 +244,7 @@ async fn transitive_dependency_of() -> Result<(), anyhow::Error> {

sbom1
.ingest_package_relates_to_package(
&mut cache,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Relationship::DependencyOf,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Expand All @@ -246,6 +254,7 @@ async fn transitive_dependency_of() -> Result<(), anyhow::Error> {

sbom1
.ingest_package_relates_to_package(
&mut cache,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Relationship::DependencyOf,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Expand All @@ -269,6 +278,8 @@ async fn ingest_package_relates_to_package_dependency_of() -> Result<(), anyhow:
let db = Database::for_test("ingest_contains_packages").await?;
let system = Graph::new(db);

let mut cache = PackageCache::new(1, &system, &Transactional::None);

let sbom1 = system
.ingest_sbom(
"http://sbomsRus.gov/thing1.json",
Expand All @@ -281,6 +292,7 @@ async fn ingest_package_relates_to_package_dependency_of() -> Result<(), anyhow:

sbom1
.ingest_package_relates_to_package(
&mut cache,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Relationship::DependencyOf,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Expand All @@ -300,6 +312,7 @@ async fn ingest_package_relates_to_package_dependency_of() -> Result<(), anyhow:

sbom2
.ingest_package_relates_to_package(
&mut cache,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Relationship::DependencyOf,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Expand Down Expand Up @@ -345,6 +358,8 @@ async fn sbom_vulnerabilities() -> Result<(), anyhow::Error> {
let db = Database::for_test("sbom_vulnerabilities").await?;
let system = Graph::new(db);

let mut cache = PackageCache::new(1, &system, &Transactional::None);

println!("{:?}", system);

let sbom = system
Expand All @@ -364,6 +379,7 @@ async fn sbom_vulnerabilities() -> Result<(), anyhow::Error> {
println!("-------------------- B");

sbom.ingest_package_relates_to_package(
&mut cache,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Relationship::DependencyOf,
"pkg://oci/[email protected]".try_into()?,
Expand All @@ -373,6 +389,7 @@ async fn sbom_vulnerabilities() -> Result<(), anyhow::Error> {
println!("-------------------- C");

sbom.ingest_package_relates_to_package(
&mut cache,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Relationship::DependencyOf,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Expand All @@ -382,6 +399,7 @@ async fn sbom_vulnerabilities() -> Result<(), anyhow::Error> {
println!("-------------------- D");

sbom.ingest_package_relates_to_package(
&mut cache,
"pkg://maven/postgres/[email protected]".try_into()?,
Relationship::DependencyOf,
"pkg://maven/io.quarkus/[email protected]".try_into()?,
Expand Down

0 comments on commit 3f0719b

Please sign in to comment.