Skip to content

Commit

Permalink
feat(bombastic): index sha256 and spdx doc namespace
Browse files Browse the repository at this point in the history
  • Loading branch information
ctron committed Oct 24, 2023
1 parent 8eb0e28 commit 828b8c5
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 23 deletions.
14 changes: 14 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 10 additions & 9 deletions bombastic/index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,21 @@ version = "0.1.0"
edition = "2021"

[dependencies]
sikula = { version = "0.4.0", features = ["time"] }
zstd = "0.12"
tantivy = { version = "0.21.0", features = ["zstd-compression"] }
log = "0.4"
time = "0.3"
tar = "0.4"
trustification-index = { path = "../../index" }
trustification-api = { path = "../../api" }
bombastic-model = { path = "../model" }
cyclonedx-bom = "0.4.0"
spdx-rs = "0.5.5"
log = "0.4"
packageurl = "0.3"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.68"
sha256 = "1.4.0"
sikula = { version = "0.4.0", features = ["time"] }
spdx-rs = "0.5.5"
tantivy = { version = "0.21.0", features = ["zstd-compression"] }
tar = "0.4"
time = "0.3"
trustification-api = { path = "../../api" }
trustification-index = { path = "../../index" }
zstd = "0.12"

[dev-dependencies]
tokio = { version = "1", features = ["full"] }
Expand Down
53 changes: 41 additions & 12 deletions bombastic/index/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@ use cyclonedx_bom::models::{
use log::{debug, info, warn};
use sikula::{mir::Direction, prelude::*};
use spdx_rs::models::Algorithm;
use tantivy::query::{TermQuery, TermSetQuery};
use tantivy::{collector::TopDocs, Order};
use tantivy::{
query::{AllQuery, BooleanQuery},
collector::TopDocs,
query::{AllQuery, BooleanQuery, TermQuery, TermSetQuery},
schema::INDEXED,
store::ZstdCompressor,
DocAddress, IndexSettings, Searcher, SnippetGenerator,
DocAddress, IndexSettings, Order, Searcher, SnippetGenerator,
};
use time::{format_description::well_known::Rfc3339, OffsetDateTime};
use trustification_api::search::SearchOptions;
Expand Down Expand Up @@ -56,7 +55,12 @@ pub struct PackageFields {

struct Fields {
indexed_timestamp: Field,
/// the "storage id"
sbom_id: Field,
/// the unique ID of the SBOM
sbom_uid: Field,
/// the SHA256 sum of its content
sbom_sha256: Field,
sbom_created: Field,
sbom_creators: Field,
sbom_name: Field,
Expand All @@ -76,6 +80,8 @@ impl Index {
let fields = Fields {
indexed_timestamp: schema.add_date_field("indexed_timestamp", STORED),
sbom_id: schema.add_text_field("sbom_id", STRING | FAST | STORED),
sbom_uid: schema.add_text_field("sbom_uid", STRING | FAST | STORED),
sbom_sha256: schema.add_text_field("sbom_sha256", STRING | STORED),
sbom_created: schema.add_date_field("sbom_created", INDEXED | FAST | STORED),
sbom_creators: schema.add_text_field("sbom_creators", STRING | STORED),
sbom_name: schema.add_text_field("sbom_name", STRING | FAST | STORED),
Expand Down Expand Up @@ -126,6 +132,10 @@ impl Index {
let mut document = doc!();

document.add_text(self.fields.sbom_id, id);
document.add_text(
self.fields.sbom_uid,
&bom.document_creation_information.spdx_document_namespace,
);
document.add_text(self.fields.sbom_name, &bom.document_creation_information.document_name);
document.add_date(
self.fields.indexed_timestamp,
Expand Down Expand Up @@ -311,6 +321,10 @@ impl Index {
Term::from_field_text(self.fields.sbom_id, value),
Default::default(),
)),
Packages::Uid(value) => Box::new(TermQuery::new(
Term::from_field_text(self.fields.sbom_uid, value),
Default::default(),
)),
Packages::Package(primary) => boost(
self.create_string_query(
&[
Expand Down Expand Up @@ -413,17 +427,24 @@ impl Index {

impl trustification_index::Index for Index {
type MatchedDocument = SearchHit;
type Document = SBOM;
type Document = (SBOM, String);

fn index_doc(&self, id: &str, doc: &SBOM) -> Result<Document, SearchError> {
match doc {
SBOM::CycloneDX(bom) => self.index_cyclonedx(id, bom),
SBOM::SPDX(bom) => self.index_spdx(id, bom),
}
fn index_doc(&self, id: &str, (doc, sha256): &Self::Document) -> Result<Document, SearchError> {
let mut doc = match doc {
SBOM::CycloneDX(bom) => self.index_cyclonedx(id, bom)?,
SBOM::SPDX(bom) => self.index_spdx(id, bom)?,
};

doc.add_text(self.fields.sbom_sha256, sha256);

Ok(doc)
}

fn parse_doc(data: &[u8]) -> Result<SBOM, SearchError> {
SBOM::parse(data).map_err(|e| SearchError::DocParser(e.to_string()))
fn parse_doc(data: &[u8]) -> Result<Self::Document, SearchError> {
let sha256 = sha256::digest(data);
SBOM::parse(data)
.map_err(|e| SearchError::DocParser(e.to_string()))
.map(|doc| (doc, sha256))
}

fn schema(&self) -> Schema {
Expand Down Expand Up @@ -497,11 +518,17 @@ impl trustification_index::Index for Index {
) -> Result<Self::MatchedDocument, SearchError> {
let doc = searcher.doc(doc_address)?;
let id = field2str(&self.schema, &doc, self.fields.sbom_id)?;
let uid = field2str(&self.schema, &doc, self.fields.sbom_uid)?;
let name = field2str(&self.schema, &doc, self.fields.sbom_name)?;

let snippet_generator = SnippetGenerator::create(searcher, query, self.fields.sbom.desc)?;
let snippet = snippet_generator.snippet_from_doc(&doc).to_html();

let file_sha256 = doc
.get_first(self.fields.sbom_sha256)
.map(|s| s.as_text().unwrap_or(""))
.unwrap_or("");

let purl = doc
.get_first(self.fields.sbom.purl)
.map(|s| s.as_text().unwrap_or(""))
Expand Down Expand Up @@ -554,7 +581,9 @@ impl trustification_index::Index for Index {
let dependencies: u64 = doc.get_all(self.fields.dep.purl).count() as u64;
let document = SearchDocument {
id: id.to_string(),
uid: uid.to_string(),
version: version.to_string(),
file_sha256: file_sha256.to_string(),
purl,
cpe,
name: name.to_string(),
Expand Down
10 changes: 8 additions & 2 deletions bombastic/model/src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ use sikula::prelude::*;

#[derive(Clone, Debug, PartialEq, Search)]
pub enum Packages<'a> {
/// Search by SBOM id
/// Search by SBOM id (the storage ID)
#[search(default)]
Id(&'a str),
/// Search by SBOM uid (the actual ID)
Uid(&'a str),
/// Search package name and package reference.
///
/// Example queries:
Expand Down Expand Up @@ -53,8 +55,10 @@ pub enum Packages<'a> {
/// A document returned from the search index for every match.
#[derive(serde::Deserialize, serde::Serialize, Debug, PartialEq, utoipa::ToSchema)]
pub struct SearchDocument {
/// SBOM identifier
/// SBOM (storage) identifier
pub id: String,
/// SBOM unique identifier
pub uid: String,
/// SBOM package name
pub name: String,
/// SBOM package version
Expand All @@ -63,6 +67,8 @@ pub struct SearchDocument {
pub cpe: Option<String>,
/// SBOM package URL
pub purl: Option<String>,
/// SHA256 of the full file, as stored
pub file_sha256: String,
/// SBOM SHA256 digest
pub sha256: String,
/// SBOM license
Expand Down

0 comments on commit 828b8c5

Please sign in to comment.