From f9fd919ae3cf677c0f49cf06c536d95b17fae729 Mon Sep 17 00:00:00 2001 From: nscuro Date: Fri, 9 Aug 2024 19:37:28 +0200 Subject: [PATCH] Refactor meta analyzer to always fetch integrity data The API server no longer specified whether only the latest version, only integrity metadata, or both should be fetched. Instead, we now always try to fetch both, relying on caching to prevent unnecessary repetitive calls to remote repositories. To further reduce remote calls being made, we now also use caching for when a given repository does not contain the analyzed component. Previously, only successful retrievals would be cached. Additionally, populates the MDC to include: * The PURL of the component being analyzed * The identifier of the repository being used Signed-off-by: nscuro --- .../v1/repo_meta_analysis.proto | 48 ++- .../RepositoryMetaAnalyzerTopology.java | 2 + .../repometaanalyzer/model/IntegrityMeta.java | 7 + .../repometaanalyzer/model/MetaModel.java | 18 +- .../processor/MetaAnalyzerProcessor.java | 377 +++++++++++------- .../repometaanalyzer/util/PurlUtil.java | 12 +- .../src/main/resources/application.properties | 2 + .../RepositoryMetaAnalyzerIT.java | 5 - .../RepositoryMetaAnalyzerTopologyTest.java | 39 +- .../processor/MetaAnalyzerProcessorTest.java | 70 +--- 10 files changed, 304 insertions(+), 276 deletions(-) diff --git a/proto/src/main/proto/org/dependencytrack/repometaanalysis/v1/repo_meta_analysis.proto b/proto/src/main/proto/org/dependencytrack/repometaanalysis/v1/repo_meta_analysis.proto index 02bd85dbb..3468435c7 100644 --- a/proto/src/main/proto/org/dependencytrack/repometaanalysis/v1/repo_meta_analysis.proto +++ b/proto/src/main/proto/org/dependencytrack/repometaanalysis/v1/repo_meta_analysis.proto @@ -1,6 +1,6 @@ syntax = "proto3"; -// Public API for Hyades repository meta analysis. +// Public API for DependencyTrack repository meta analysis. package org.dependencytrack.repometaanalysis.v1; import "google/protobuf/timestamp.proto"; @@ -11,14 +11,8 @@ option java_package = "org.dependencytrack.proto.repometaanalysis.v1"; message AnalysisCommand { // The component that shall be analyzed. Component component = 1; - FetchMeta fetch_meta = 2; -} -enum FetchMeta{ - FETCH_META_UNSPECIFIED = 0; - FETCH_META_INTEGRITY_DATA = 1; - FETCH_META_LATEST_VERSION = 2; - FETCH_META_INTEGRITY_DATA_AND_LATEST_VERSION = 3; + reserved 2; // fetch_meta; removed in 5.6.0 } message AnalysisResult { @@ -34,8 +28,33 @@ message AnalysisResult { // When the latest version was published. optional google.protobuf.Timestamp published = 4; - // Integrity metadata of the component. optional IntegrityMeta integrity_meta = 5; + + // When the latest version information was fetched from the repository. + optional google.protobuf.Timestamp fetched_at = 6; +} + +message IntegrityMeta { + // The MD5 hash of the component. + optional string md5 = 1; + + // The SHA1 hash of the component. + optional string sha1 = 2; + + // The SHA256 hash of the component. + optional string sha256 = 3; + + // The SHA512 hash of the component. + optional string sha512 = 4; + + // When the component was published. + optional google.protobuf.Timestamp current_version_last_modified = 5; + + // URL from where the information was sourced. + string meta_source_url = 6; + + // When the integrity metadata was fetched. + google.protobuf.Timestamp fetched_at = 7; } message Component { @@ -47,14 +66,3 @@ message Component { optional bool internal = 2; optional string uuid = 3; } - -message IntegrityMeta { - optional string md5 = 1; - optional string sha1 = 2; - optional string sha256 = 3; - optional string sha512 = 4; - // When the component current version last modified. - optional google.protobuf.Timestamp current_version_last_modified = 5; - // Complete URL to fetch integrity metadata of the component. - optional string meta_source_url = 6; -} diff --git a/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/RepositoryMetaAnalyzerTopology.java b/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/RepositoryMetaAnalyzerTopology.java index 2f06f20ac..bc547d133 100644 --- a/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/RepositoryMetaAnalyzerTopology.java +++ b/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/RepositoryMetaAnalyzerTopology.java @@ -60,6 +60,8 @@ public Topology topology(final RepositoryAnalyzerFactory analyzerFactory, .withName(processorNameConsume(KafkaTopic.REPO_META_ANALYSIS_COMMAND))) .filter((key, scanCommand) -> scanCommand.hasComponent() && isValidPurl(scanCommand.getComponent().getPurl()), Named.as("filter_components_with_valid_purl")) + // TODO: This repartition is no longer required as of API server 5.6.0. + // Remove this in Hyades v0.7.0 and consume from REPO_META_ANALYSIS_COMMAND directly instead. // Re-key to PURL coordinates WITHOUT VERSION. As we are fetching data for packages, // but not specific package versions, including the version here would make our caching // largely ineffective. We want events for the same package to be sent to the same partition. diff --git a/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/model/IntegrityMeta.java b/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/model/IntegrityMeta.java index dabc8999f..28e6da8ec 100644 --- a/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/model/IntegrityMeta.java +++ b/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/model/IntegrityMeta.java @@ -35,6 +35,8 @@ public class IntegrityMeta implements Serializable { private String metaSourceUrl; + private final Date fetchedAt = new Date(); + public String getMd5() { return md5; } @@ -82,4 +84,9 @@ public String getMetaSourceUrl() { public void setMetaSourceUrl(String metaSourceUrl) { this.metaSourceUrl = metaSourceUrl; } + + public Date getFetchedAt() { + return fetchedAt; + } + } \ No newline at end of file diff --git a/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/model/MetaModel.java b/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/model/MetaModel.java index ccf8bfae4..63aaf9d0c 100644 --- a/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/model/MetaModel.java +++ b/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/model/MetaModel.java @@ -28,9 +28,12 @@ public class MetaModel implements Serializable { private Component component; private String latestVersion; private Date publishedTimestamp; + private String repositoryIdentifier; + private final Date fetchedAt = new Date(); - public MetaModel(){ + public MetaModel() { } + public MetaModel(final Component component) { this.component = component; } @@ -54,4 +57,17 @@ public Date getPublishedTimestamp() { public void setPublishedTimestamp(final Date publishedTimestamp) { this.publishedTimestamp = publishedTimestamp; } + + public String getRepositoryIdentifier() { + return repositoryIdentifier; + } + + public void setRepositoryIdentifier(final String repositoryIdentifier) { + this.repositoryIdentifier = repositoryIdentifier; + } + + public Date getFetchedAt() { + return fetchedAt; + } + } diff --git a/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/processor/MetaAnalyzerProcessor.java b/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/processor/MetaAnalyzerProcessor.java index bf936042a..eeb4c3056 100644 --- a/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/processor/MetaAnalyzerProcessor.java +++ b/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/processor/MetaAnalyzerProcessor.java @@ -19,19 +19,18 @@ package org.dependencytrack.repometaanalyzer.processor; import com.github.packageurl.PackageURL; -import com.google.protobuf.Timestamp; +import com.google.protobuf.util.Timestamps; import io.quarkus.cache.Cache; import io.quarkus.narayana.jta.QuarkusTransaction; import org.apache.kafka.streams.processor.api.ContextualFixedKeyProcessor; import org.apache.kafka.streams.processor.api.FixedKeyRecord; import org.dependencytrack.common.SecretDecryptor; -import org.dependencytrack.persistence.model.Component; import org.dependencytrack.persistence.model.Repository; import org.dependencytrack.persistence.model.RepositoryType; import org.dependencytrack.persistence.repository.RepoEntityRepository; import org.dependencytrack.proto.repometaanalysis.v1.AnalysisCommand; import org.dependencytrack.proto.repometaanalysis.v1.AnalysisResult; -import org.dependencytrack.proto.repometaanalysis.v1.FetchMeta; +import org.dependencytrack.proto.repometaanalysis.v1.Component; import org.dependencytrack.repometaanalyzer.model.IntegrityMeta; import org.dependencytrack.repometaanalyzer.model.MetaAnalyzerCacheKey; import org.dependencytrack.repometaanalyzer.model.MetaModel; @@ -39,27 +38,33 @@ import org.dependencytrack.repometaanalyzer.repositories.RepositoryAnalyzerFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.slf4j.MDC; import java.util.List; import java.util.NoSuchElementException; import java.util.Optional; -import static org.dependencytrack.repometaanalyzer.util.PurlUtil.parsePurlCoordinates; +import static org.dependencytrack.repometaanalyzer.util.PurlUtil.parsePurl; import static org.dependencytrack.repometaanalyzer.util.PurlUtil.parsePurlCoordinatesWithoutVersion; class MetaAnalyzerProcessor extends ContextualFixedKeyProcessor { private static final Logger LOGGER = LoggerFactory.getLogger(MetaAnalyzerProcessor.class); + private static final String MDC_COMPONENT_PURL = "componentPurl"; + private static final String MDC_REPOSITORY = "repository"; + private final RepoEntityRepository repoEntityRepository; private final RepositoryAnalyzerFactory analyzerFactory; private final SecretDecryptor secretDecryptor; private final Cache cache; - MetaAnalyzerProcessor(final RepoEntityRepository repoEntityRepository, - final RepositoryAnalyzerFactory analyzerFactory, - final SecretDecryptor secretDecryptor, - final Cache cache) { + MetaAnalyzerProcessor( + final RepoEntityRepository repoEntityRepository, + final RepositoryAnalyzerFactory analyzerFactory, + final SecretDecryptor secretDecryptor, + final Cache cache + ) { this.repoEntityRepository = repoEntityRepository; this.analyzerFactory = analyzerFactory; this.secretDecryptor = secretDecryptor; @@ -68,55 +73,58 @@ class MetaAnalyzerProcessor extends ContextualFixedKeyProcessor record) { - final var analysisCommand = record.value(); - final var component = analysisCommand.getComponent(); - // NOTE: Do not use purlWithoutVersion for the analysis! - // It only contains the type, namespace and name, but is missing the - // version and other qualifiers. Some analyzers require the version. - final PackageURL purl = parsePurlCoordinates(component.getPurl()); - - final Optional optionalAnalyzer = analyzerFactory.createAnalyzer(purl); - if (optionalAnalyzer.isEmpty()) { - LOGGER.debug("No analyzer is capable of analyzing {}", purl); + final AnalysisCommand analysisCommand = record.value(); + final Component component = analysisCommand.getComponent(); + final PackageURL purl = parsePurl(component.getPurl()); + + final Optional analyzer = analyzerFactory.createAnalyzer(purl); + if (analyzer.isEmpty()) { + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("No analyzer is capable of analyzing {}", purl); + } + context().forward(record .withValue(AnalysisResult.newBuilder().setComponent(component).build()) .withTimestamp(context().currentSystemTimeMs())); return; } - final IMetaAnalyzer analyzer = optionalAnalyzer.get(); - var applicableRepositories = getApplicableRepositories(analyzer.supportedRepositoryType()); + final AnalysisResult result; + try (var ignoredMdcComponentPurl = MDC.putCloseable(MDC_COMPONENT_PURL, component.getPurl())) { + result = analyzeComponent(analyzer.get(), component); + } + + context().forward(record.withValue(result).withTimestamp(context().currentSystemTimeMs())); + } + + private AnalysisResult analyzeComponent(final IMetaAnalyzer analyzer, final Component component) { + final List applicableRepositories = getApplicableRepositories(analyzer.supportedRepositoryType()); + final Optional optionalRepoMeta = performRepoMetaAnalysis(applicableRepositories, analyzer, component); + final Optional optionalIntegrityMeta = performIntegrityMetaAnalysis(applicableRepositories, analyzer, component); - AnalysisResult.Builder resultBuilder = AnalysisResult.newBuilder() + final AnalysisResult.Builder resultBuilder = AnalysisResult.newBuilder() .setComponent(component); - if (analysisCommand.getFetchMeta().equals(FetchMeta.FETCH_META_LATEST_VERSION) - || analysisCommand.getFetchMeta().equals(FetchMeta.FETCH_META_INTEGRITY_DATA_AND_LATEST_VERSION)) { - resultBuilder = performRepoMeta(applicableRepositories, analyzer, analysisCommand, purl, resultBuilder); - if (analysisCommand.getFetchMeta().equals(FetchMeta.FETCH_META_LATEST_VERSION)) { - // forward result for only latest version - context().forward(record - .withValue(resultBuilder.build()) - .withTimestamp(context().currentSystemTimeMs())); - return; - } + if (optionalRepoMeta.isPresent()) { + final MetaModel repoMeta = optionalRepoMeta.get(); + resultBuilder.setLatestVersion(repoMeta.getLatestVersion()); + resultBuilder.setFetchedAt(Timestamps.fromDate(repoMeta.getFetchedAt())); + Optional.ofNullable(repoMeta.getRepositoryIdentifier()).ifPresent(resultBuilder::setRepository); + Optional.ofNullable(repoMeta.getPublishedTimestamp()) + .map(Timestamps::fromDate) + .ifPresent(resultBuilder::setPublished); + } else if (LOGGER.isDebugEnabled()) { + LOGGER.debug("No repository metadata found"); } - if (analysisCommand.getFetchMeta().equals(FetchMeta.FETCH_META_INTEGRITY_DATA) - || analysisCommand.getFetchMeta().equals(FetchMeta.FETCH_META_INTEGRITY_DATA_AND_LATEST_VERSION)) { - resultBuilder = performIntegrityMeta(applicableRepositories, analyzer, analysisCommand, purl, resultBuilder); - if (analysisCommand.getFetchMeta().equals(FetchMeta.FETCH_META_INTEGRITY_DATA)) { - // forward result for only integrity meta - context().forward(record - .withValue(resultBuilder.build()) - .withTimestamp(context().currentSystemTimeMs())); - return; - } + if (optionalIntegrityMeta.isPresent()) { + final IntegrityMeta integrityMeta = optionalIntegrityMeta.get(); + resultBuilder.setIntegrityMeta(convert(integrityMeta)); + } else if (LOGGER.isDebugEnabled()) { + LOGGER.debug("No integrity meta found"); } - // forward result for both latest version and integrity data OR even when no satisfactory results were yielded. - context().forward(record - .withValue(resultBuilder.build()) - .withTimestamp(context().currentSystemTimeMs())); + + return resultBuilder.build(); } private List getApplicableRepositories(final RepositoryType repositoryType) { @@ -129,69 +137,51 @@ private List getApplicableRepositories(final RepositoryType reposito // Quarkus has Hibernate L2 cache enabled by default, we just need to opt in to using // it for this query: https://quarkus.io/guides/hibernate-orm#caching-of-queries // Should be tested whether throughput can be improved this way. - //changed this to joinexisting because with new transaction, it is not fetching values that were inserted from - // existing previous transaction and returning empty result return QuarkusTransaction.joiningExisting() .call(() -> repoEntityRepository.findEnabledRepositoriesByType(repositoryType)); } - private Optional getCachedResult(final MetaAnalyzerCacheKey cacheKey) { - try { - final AnalysisResult cachedResult = cache.get(cacheKey, - key -> { - // null values would be cached, so throw an exception instead. - // See https://quarkus.io/guides/cache#let-exceptions-bubble-up - throw new NoSuchElementException(); - }).await().indefinitely(); - return Optional.of(cachedResult); - } catch (Exception e) { - return Optional.empty(); - } - } + private IntegrityMeta fetchIntegrityMeta( + final IMetaAnalyzer analyzer, + final Repository repository, + final Component component + ) { + configureAnalyzer(analyzer, repository); - private void cacheResult(final MetaAnalyzerCacheKey cacheKey, final AnalysisResult result) { - cache.get(cacheKey, key -> result).await().indefinitely(); - } + // Analyzers still work with "legacy" data models, + // allowing us to avoid major refactorings of the original code. + final var metaComponent = new org.dependencytrack.persistence.model.Component(); + metaComponent.setPurl(component.getPurl()); - private IntegrityMeta fetchIntegrityMeta(IMetaAnalyzer analyzer, final Repository repository, final AnalysisCommand analysisCommand) { - configureAnalyzer(analyzer, repository); - final var component = new Component(); - component.setPurl(analysisCommand.getComponent().getPurl()); - LOGGER.debug("Performing integrity meta fetch on purl: {}", component.getPurl()); - IntegrityMeta integrityMeta; try { - integrityMeta = analyzer.getIntegrityMeta(component); - } catch (UnsupportedOperationException unsupportedPackageException) { - LOGGER.debug("Failed to analyze {} using {} with repository {} because package type is not supported", - component.getPurl(), analyzer.getName(), repository.getIdentifier(), unsupportedPackageException); + return analyzer.getIntegrityMeta(metaComponent); + } catch (UnsupportedOperationException e) { + LOGGER.debug("Package type is not supported"); return null; } catch (Exception e) { - LOGGER.error("Failed to analyze {} using {} with repository {}", - component.getPurl(), analyzer.getName(), repository.getIdentifier(), e); + LOGGER.error("Failed to fetch integrity metadata", e); return null; } - LOGGER.debug("Found integrity metadata for: {} using repository: {} ({})", - component.getPurl(), repository.getIdentifier(), repository.getType()); - return integrityMeta; } - private MetaModel fetchRepoMeta(IMetaAnalyzer analyzer, final Repository repository, final AnalysisCommand analysisCommand) { + private MetaModel fetchRepoMeta( + final IMetaAnalyzer analyzer, + final Repository repository, + final Component component + ) { configureAnalyzer(analyzer, repository); + // Analyzers still work with "legacy" data models, // allowing us to avoid major refactorings of the original code. - final var component = new Component(); - component.setPurl(analysisCommand.getComponent().getPurl()); - LOGGER.debug("Performing meta analysis on purl: {}", component.getPurl()); - MetaModel metaModel = null; + final var metaComponent = new org.dependencytrack.persistence.model.Component(); + metaComponent.setPurl(component.getPurl()); + try { - metaModel = analyzer.analyze(component); - LOGGER.debug("Found component metadata for: {} using repository: {} ({})", - component.getPurl(), repository.getIdentifier(), repository.getType()); + return analyzer.analyze(metaComponent); } catch (Exception e) { - LOGGER.error("Failed to analyze {} using {} with repository {}", - component.getPurl(), analyzer.getName(), repository.getIdentifier(), e); + LOGGER.error("Failed to fetch repository metadata", e); + return null; } - return metaModel; } private void configureAnalyzer(final IMetaAnalyzer analyzer, final Repository repository) { @@ -201,84 +191,161 @@ private void configureAnalyzer(final IMetaAnalyzer analyzer, final Repository re try { analyzer.setRepositoryUsernameAndPassword(repository.getUsername(), secretDecryptor.decryptAsString(repository.getPassword())); } catch (Exception e) { - LOGGER.error("Failed decrypting password for repository: " + repository.getIdentifier(), e); + LOGGER.error("Failed to decrypt password", e); } } } - private AnalysisResult.Builder performRepoMeta(List applicableRepositories, final IMetaAnalyzer analyzer, final AnalysisCommand analysisCommand, - final PackageURL componentPurl, AnalysisResult.Builder resultBuilder) { - final var component = analysisCommand.getComponent(); - for (Repository repository : applicableRepositories) { - if ((repository.isInternal() && !component.getInternal()) - || (!repository.isInternal() && component.getInternal())) { - // Internal components should only be analyzed using internal repositories. - // Non-internal components should only be analyzed with non-internal repositories. - // We do not want non-internal components being analyzed with internal repositories as - // internal repositories are not the source of truth for these components, even if the - // repository acts as a proxy to the source of truth. This cannot be assumed. - LOGGER.debug("Skipping component with purl {} ", component.getPurl()); - continue; + private Optional performRepoMetaAnalysis( + final List applicableRepositories, + final IMetaAnalyzer analyzer, + final Component component + ) { + for (final Repository repository : applicableRepositories) { + try (var ignoredMdcRepository = MDC.putCloseable(MDC_REPOSITORY, repository.getIdentifier())) { + final Optional optionalResult = + performRepoMetaAnalysisForRepository(analyzer, component, repository); + if (optionalResult.isPresent()) { + return optionalResult; + } } - resultBuilder.setRepository(repository.getIdentifier()); - var cacheKeyWithoutVersion = new MetaAnalyzerCacheKey(analyzer.getName(), parsePurlCoordinatesWithoutVersion(component.getPurl()).canonicalize(), repository.getUrl()); - var cachedResult = getCachedResult(cacheKeyWithoutVersion); - if (cachedResult.isPresent()) { - LOGGER.debug("Cache hit for latest version (analyzer: {}, purl: {}, repository: {})", analyzer.getName(), componentPurl, repository.getIdentifier()); - resultBuilder.setLatestVersion(cachedResult.get().getLatestVersion()); - resultBuilder.setPublished(cachedResult.get().getPublished()); - break; - } else { - LOGGER.debug("Cache miss for latest version (analyzer: {}, purl: {}, repository: {})", analyzer.getName(), componentPurl, repository.getIdentifier()); - final var repoMeta = fetchRepoMeta(analyzer, repository, analysisCommand); - if (repoMeta != null && repoMeta.getLatestVersion() != null && !repoMeta.getLatestVersion().isEmpty()) { - Optional.ofNullable(repoMeta.getLatestVersion()).ifPresent( - resultBuilder::setLatestVersion); - Optional.ofNullable(repoMeta.getPublishedTimestamp()).ifPresent( - version -> resultBuilder.setPublished(Timestamp.newBuilder() - .setSeconds(repoMeta.getPublishedTimestamp().getTime() / 1000))); - cacheResult(cacheKeyWithoutVersion, resultBuilder.build()); - break; + } + + return Optional.empty(); + } + + private Optional performRepoMetaAnalysisForRepository( + final IMetaAnalyzer analyzer, + final Component component, + final Repository repository + ) { + if ((repository.isInternal() && !component.getInternal()) + || (!repository.isInternal() && component.getInternal())) { + // Internal components should only be analyzed using internal repositories. + // Non-internal components should only be analyzed with non-internal repositories. + // We do not want non-internal components being analyzed with internal repositories as + // internal repositories are not the source of truth for these components, even if the + // repository acts as a proxy to the source of truth. This cannot be assumed. + LOGGER.debug("Skipping component with purl {} ", component.getPurl()); + return Optional.empty(); + } + + // NB: Cache key should only include type, namespace and name parts of the PURL, + // since latest version information will differ based on the input PURLs version + // or qualifiers. + // + // For example: pkg:maven/foo/bar@1.2.3?type=jar + // would have the same latest version as: pkg:maven/foo/bar@3.2.1?type=pom + final String purlCoordinatesWithoutVersion = parsePurlCoordinatesWithoutVersion(component.getPurl()).canonicalize(); + final var cacheKey = new MetaAnalyzerCacheKey(analyzer.getName(), purlCoordinatesWithoutVersion, repository.getUrl()); + + final Optional> cacheEntry = getCachedResult(cacheKey); + if (cacheEntry.isPresent()) { + return cacheEntry.get(); + } + + final MetaModel repoMeta = fetchRepoMeta(analyzer, repository, component); + if (repoMeta == null || repoMeta.getLatestVersion() == null || repoMeta.getLatestVersion().isBlank()) { + cacheResult(cacheKey, null); + return Optional.empty(); + } + + repoMeta.setRepositoryIdentifier(repository.getIdentifier()); + cacheResult(cacheKey, repoMeta); + return Optional.of(repoMeta); + } + + private Optional performIntegrityMetaAnalysis( + final List applicableRepositories, + final IMetaAnalyzer analyzer, + final Component component + ) { + for (final Repository repository : applicableRepositories) { + try (var ignoredMdcRepository = MDC.putCloseable(MDC_REPOSITORY, repository.getIdentifier())) { + final Optional optionalResult = + performIntegrityMetaAnalysisForRepository(analyzer, component, repository); + if (optionalResult.isPresent()) { + return optionalResult; } } } - return resultBuilder; + + return Optional.empty(); + } + + private Optional performIntegrityMetaAnalysisForRepository( + final IMetaAnalyzer analyzer, + final Component component, + final Repository repository + ) { + if ((repository.isInternal() && !component.getInternal()) + || (!repository.isInternal() && component.getInternal())) { + return Optional.empty(); + } + + // NB: Cache key should include the entire PURL (including version and qualifiers), + // since integrity data can vary depending on certain qualifiers. + // + // For example: pkg:maven/foo/bar@1.2.3?type=jar + // refers to a different artifact than: pkg:maven/foo/bar@1.2.3?type=pom + // and thus will have different hashes, too. + final var cacheKey = new MetaAnalyzerCacheKey(analyzer.getName(), component.getPurl(), repository.getUrl()); + final Optional> cacheEntry = getCachedResult(cacheKey); + if (cacheEntry.isPresent()) { + return cacheEntry.get(); + } + + final IntegrityMeta integrityMeta = fetchIntegrityMeta(analyzer, repository, component); + if (integrityMeta == null) { + cacheResult(cacheKey, null); + return Optional.empty(); + } + + cacheResult(cacheKey, integrityMeta); + return Optional.of(integrityMeta); } - private AnalysisResult.Builder performIntegrityMeta(List applicableRepositories, final IMetaAnalyzer analyzer, final AnalysisCommand analysisCommand, - final PackageURL componentPurl, AnalysisResult.Builder resultBuilder) { - final var component = analysisCommand.getComponent(); - for (Repository repository : applicableRepositories) { - if ((repository.isInternal() && !component.getInternal()) - || (!repository.isInternal() && component.getInternal())) { - LOGGER.debug("Skipping component with purl {} ", component.getPurl()); - continue; + private Optional> getCachedResult(final MetaAnalyzerCacheKey cacheKey) { + try { + final T cachedValue = cache.get(cacheKey, + key -> { + // null values would be cached, so throw an exception instead. + // See https://quarkus.io/guides/cache#let-exceptions-bubble-up + throw new NoSuchElementException(); + }).await().indefinitely(); + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("Cache hit for {}", cachedValue); } - var cacheKeyWithVersion = new MetaAnalyzerCacheKey(analyzer.getName(), parsePurlCoordinates(component.getPurl()).canonicalize(), repository.getUrl()); - var cachedResult = getCachedResult(cacheKeyWithVersion); - if (cachedResult.isPresent()) { - LOGGER.debug("Cache hit for integrity meta (analyzer: {}, purl: {}, repository: {})", analyzer.getName(), componentPurl, repository.getIdentifier()); - resultBuilder.setIntegrityMeta(cachedResult.get().getIntegrityMeta()); - break; - } else { - LOGGER.debug("Cache miss for integrity meta (analyzer: {}, purl: {}, repository: {})", analyzer.getName(), componentPurl, repository.getIdentifier()); - var integrityMeta = fetchIntegrityMeta(analyzer, repository, analysisCommand); - if (integrityMeta != null) { - var metaBuilder = org.dependencytrack.proto.repometaanalysis.v1.IntegrityMeta.newBuilder(); - Optional.ofNullable(integrityMeta.getMd5()).ifPresent(metaBuilder::setMd5); - Optional.ofNullable(integrityMeta.getSha1()).ifPresent(metaBuilder::setSha1); - Optional.ofNullable(integrityMeta.getSha256()).ifPresent(metaBuilder::setSha256); - Optional.ofNullable(integrityMeta.getSha512()).ifPresent(metaBuilder::setSha512); - Optional.ofNullable(integrityMeta.getMetaSourceUrl()).ifPresent(metaBuilder::setMetaSourceUrl); - Optional.ofNullable(integrityMeta.getCurrentVersionLastModified()).ifPresent(date -> - metaBuilder.setCurrentVersionLastModified(Timestamp.newBuilder() - .setSeconds(date.getTime() / 1000))); - resultBuilder.setIntegrityMeta(metaBuilder); - cacheResult(cacheKeyWithVersion, resultBuilder.build()); - break; - } + + // cachedValue may be null. + final Optional optionalValue = Optional.ofNullable(cachedValue); + + return Optional.of(optionalValue); + } catch (Exception e) { + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("Cache miss for {}", cacheKey); } + + return Optional.empty(); } - return resultBuilder; } + + private void cacheResult(final MetaAnalyzerCacheKey cacheKey, final T result) { + cache.get(cacheKey, key -> result).await().indefinitely(); + } + + private static org.dependencytrack.proto.repometaanalysis.v1.IntegrityMeta convert(final IntegrityMeta integrityMeta) { + final var builder = org.dependencytrack.proto.repometaanalysis.v1.IntegrityMeta.newBuilder(); + builder.setFetchedAt(Timestamps.fromDate(integrityMeta.getFetchedAt())); + Optional.ofNullable(integrityMeta.getMd5()).ifPresent(builder::setMd5); + Optional.ofNullable(integrityMeta.getSha1()).ifPresent(builder::setSha1); + Optional.ofNullable(integrityMeta.getSha256()).ifPresent(builder::setSha256); + Optional.ofNullable(integrityMeta.getSha512()).ifPresent(builder::setSha512); + Optional.ofNullable(integrityMeta.getMetaSourceUrl()).ifPresent(builder::setMetaSourceUrl); + Optional.ofNullable(integrityMeta.getCurrentVersionLastModified()) + .map(Timestamps::fromDate) + .ifPresent(builder::setCurrentVersionLastModified); + return builder.build(); + } + } \ No newline at end of file diff --git a/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/util/PurlUtil.java b/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/util/PurlUtil.java index f21f97a85..f839cba9a 100644 --- a/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/util/PurlUtil.java +++ b/repository-meta-analyzer/src/main/java/org/dependencytrack/repometaanalyzer/util/PurlUtil.java @@ -30,14 +30,13 @@ private PurlUtil() { * @param purl the purl string to parse * @return a PackageURL object */ - public static PackageURL parsePurlCoordinates(final String purl) { + public static PackageURL parsePurl(final String purl) { try { return new PackageURL(purl); } catch (MalformedPackageURLException e) { throw new IllegalStateException(""" - The provided PURL is invalid, even though it should have been - validated in a previous processing step - """, e); + The provided PURL is invalid, even though it should have been \ + validated in a previous processing step""", e); } } @@ -52,9 +51,8 @@ public static PackageURL parsePurlCoordinatesWithoutVersion(final String purl) { parsedPurl.getName(), null, null, null); } catch (MalformedPackageURLException e) { throw new IllegalStateException(""" - The provided PURL is invalid, even though it should have been - validated in a previous processing step - """, e); + The provided PURL is invalid, even though it should have been \ + validated in a previous processing step""", e); } } } diff --git a/repository-meta-analyzer/src/main/resources/application.properties b/repository-meta-analyzer/src/main/resources/application.properties index 07a35cf17..a21db07d0 100644 --- a/repository-meta-analyzer/src/main/resources/application.properties +++ b/repository-meta-analyzer/src/main/resources/application.properties @@ -14,6 +14,8 @@ quarkus.http.port=8091 # @type: boolean quarkus.log.console.json=false +quarkus.log.console.format=%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] (%t) %s %X %e%n + # @category: Observability # @hidden quarkus.log.category."org.apache.kafka".level=WARN diff --git a/repository-meta-analyzer/src/test/java/org/dependencytrack/repometaanalyzer/RepositoryMetaAnalyzerIT.java b/repository-meta-analyzer/src/test/java/org/dependencytrack/repometaanalyzer/RepositoryMetaAnalyzerIT.java index da30234fe..b8ade427a 100644 --- a/repository-meta-analyzer/src/test/java/org/dependencytrack/repometaanalyzer/RepositoryMetaAnalyzerIT.java +++ b/repository-meta-analyzer/src/test/java/org/dependencytrack/repometaanalyzer/RepositoryMetaAnalyzerIT.java @@ -37,7 +37,6 @@ import org.dependencytrack.proto.KafkaProtobufSerde; import org.dependencytrack.proto.repometaanalysis.v1.AnalysisCommand; import org.dependencytrack.proto.repometaanalysis.v1.AnalysisResult; -import org.dependencytrack.proto.repometaanalysis.v1.FetchMeta; import org.eclipse.microprofile.config.ConfigProvider; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -119,7 +118,6 @@ void test() { final var command = AnalysisCommand.newBuilder() .setComponent(org.dependencytrack.proto.repometaanalysis.v1.Component.newBuilder() .setPurl("pkg:golang/github.com/acme/acme-lib@9.1.1")) - .setFetchMeta(FetchMeta.FETCH_META_LATEST_VERSION) .build(); kafkaCompanion @@ -181,7 +179,6 @@ void test() { final var command = AnalysisCommand.newBuilder() .setComponent(org.dependencytrack.proto.repometaanalysis.v1.Component.newBuilder() .setPurl("pkg:golang/github.com/acme/acme-lib@9.1.1")) - .setFetchMeta(FetchMeta.FETCH_META_INTEGRITY_DATA) .build(); kafkaCompanion @@ -423,7 +420,6 @@ void test() { final var command = AnalysisCommand.newBuilder() .setComponent(org.dependencytrack.proto.repometaanalysis.v1.Component.newBuilder() .setPurl("pkg:npm/amazon-s3-uri@0.0.1")) - .setFetchMeta(FetchMeta.FETCH_META_INTEGRITY_DATA_AND_LATEST_VERSION) .build(); kafkaCompanion @@ -493,7 +489,6 @@ void testIntegrityMetaOnly() { final var command = AnalysisCommand.newBuilder() .setComponent(org.dependencytrack.proto.repometaanalysis.v1.Component.newBuilder() .setPurl("pkg:npm/amazon-s3-uri@0.0.1")) - .setFetchMeta(FetchMeta.FETCH_META_INTEGRITY_DATA) .build(); kafkaCompanion diff --git a/repository-meta-analyzer/src/test/java/org/dependencytrack/repometaanalyzer/RepositoryMetaAnalyzerTopologyTest.java b/repository-meta-analyzer/src/test/java/org/dependencytrack/repometaanalyzer/RepositoryMetaAnalyzerTopologyTest.java index 1cfb289ba..e7f11de3d 100644 --- a/repository-meta-analyzer/src/test/java/org/dependencytrack/repometaanalyzer/RepositoryMetaAnalyzerTopologyTest.java +++ b/repository-meta-analyzer/src/test/java/org/dependencytrack/repometaanalyzer/RepositoryMetaAnalyzerTopologyTest.java @@ -39,7 +39,6 @@ import org.dependencytrack.proto.KafkaProtobufSerializer; import org.dependencytrack.proto.repometaanalysis.v1.AnalysisCommand; import org.dependencytrack.proto.repometaanalysis.v1.AnalysisResult; -import org.dependencytrack.proto.repometaanalysis.v1.FetchMeta; import org.dependencytrack.repometaanalyzer.model.IntegrityMeta; import org.dependencytrack.repometaanalyzer.model.MetaAnalyzerCacheKey; import org.dependencytrack.repometaanalyzer.model.MetaModel; @@ -98,13 +97,18 @@ void beforeEach() { .thenReturn(Optional.of(analyzerMock)); } + @AfterEach + void afterEach() { + testDriver.close(); + cache.invalidateAll().await().indefinitely(); + } + @Test void testAnalyzerCacheMiss() throws Exception { final var command = AnalysisCommand.newBuilder() .setComponent(org.dependencytrack.proto.repometaanalysis.v1.Component.newBuilder() .setPurl("pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.2") .build()) - .setFetchMeta(FetchMeta.FETCH_META_INTEGRITY_DATA_AND_LATEST_VERSION) .build(); // mock repository data @@ -144,7 +148,6 @@ void testAnalyzerCacheHitRepoMeta() { final var command = AnalysisCommand.newBuilder() .setComponent(org.dependencytrack.proto.repometaanalysis.v1.Component.newBuilder() .setPurl("pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.2")) - .setFetchMeta(FetchMeta.FETCH_META_LATEST_VERSION) .build(); // mock repository data @@ -156,18 +159,16 @@ void testAnalyzerCacheHitRepoMeta() { when(repoEntityRepositoryMock.findEnabledRepositoriesByType(any())) .thenReturn(List.of(repository)); - final var result = AnalysisResult.newBuilder() - .setComponent(command.getComponent()) - .setRepository("testRepository") - .setLatestVersion("test") - .build(); + final var cachedRepoMeta = new MetaModel(); + cachedRepoMeta.setRepositoryIdentifier("testRepository"); + cachedRepoMeta.setLatestVersion("test"); when(analyzerMock.getName()).thenReturn("testAnalyzer"); // populate the cache to hit the match final var cacheKey = new MetaAnalyzerCacheKey("testAnalyzer", "pkg:maven/com.fasterxml.jackson.core/jackson-databind", "https://repo1.maven.org/maven2/"); - cache.as(CaffeineCache.class).put(cacheKey, completedFuture(result)); + cache.as(CaffeineCache.class).put(cacheKey, completedFuture(cachedRepoMeta)); inputTopic.pipeInput("foo", command); final KeyValue record = outputTopic.readKeyValue(); @@ -182,7 +183,6 @@ void testAnalyzerCacheHitIntegrityMeta() { .setComponent(org.dependencytrack.proto.repometaanalysis.v1.Component.newBuilder() .setUuid(uuid.toString()) .setPurl("pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.2")) - .setFetchMeta(FetchMeta.FETCH_META_INTEGRITY_DATA) .build(); // mock repository data @@ -194,20 +194,16 @@ void testAnalyzerCacheHitIntegrityMeta() { when(repoEntityRepositoryMock.findEnabledRepositoriesByType(any())) .thenReturn(List.of(repository)); - final var result = AnalysisResult.newBuilder() - .setComponent(command.getComponent()) - .setRepository("testRepository") - .setLatestVersion("test") - .setIntegrityMeta(org.dependencytrack.proto.repometaanalysis.v1.IntegrityMeta.newBuilder() - .setSha1("sha1").build()) - .build(); + final var cachedIntegrityMeta = new IntegrityMeta(); + cachedIntegrityMeta.setSha1("sha1"); + when(analyzerMock.getName()).thenReturn("testAnalyzer"); // populate the cache to hit the match final var cacheKey = new MetaAnalyzerCacheKey("testAnalyzer", "pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.2", "https://repo1.maven.org/maven2/"); - cache.as(CaffeineCache.class).put(cacheKey, completedFuture(result)); + cache.as(CaffeineCache.class).put(cacheKey, completedFuture(cachedIntegrityMeta)); inputTopic.pipeInput("foo", command); final KeyValue record = outputTopic.readKeyValue(); @@ -222,7 +218,6 @@ void testPerformMetaAnalysis() { final var command = AnalysisCommand.newBuilder() .setComponent(org.dependencytrack.proto.repometaanalysis.v1.Component.newBuilder() .setPurl("pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.2")) - .setFetchMeta(FetchMeta.FETCH_META_INTEGRITY_DATA_AND_LATEST_VERSION) .build(); inputTopic.pipeInput("foo", command); @@ -235,7 +230,6 @@ void testPerformMetaAnalysis() { void testNoPurlComponent() { final var command = AnalysisCommand.newBuilder() .setComponent(org.dependencytrack.proto.repometaanalysis.v1.Component.newBuilder()) - .setFetchMeta(FetchMeta.FETCH_META_INTEGRITY_DATA_AND_LATEST_VERSION) .build(); inputTopic.pipeInput("foo", command); @@ -260,10 +254,5 @@ void testMetaOutput() { Assertions.assertFalse(record.value.hasIntegrityMeta()); } - @AfterEach - void afterEach() { - testDriver.close(); - cache.invalidateAll(); - } } diff --git a/repository-meta-analyzer/src/test/java/org/dependencytrack/repometaanalyzer/processor/MetaAnalyzerProcessorTest.java b/repository-meta-analyzer/src/test/java/org/dependencytrack/repometaanalyzer/processor/MetaAnalyzerProcessorTest.java index cd47917dd..87eb494cf 100644 --- a/repository-meta-analyzer/src/test/java/org/dependencytrack/repometaanalyzer/processor/MetaAnalyzerProcessorTest.java +++ b/repository-meta-analyzer/src/test/java/org/dependencytrack/repometaanalyzer/processor/MetaAnalyzerProcessorTest.java @@ -29,8 +29,6 @@ import io.quarkus.test.junit.QuarkusTest; import io.quarkus.test.junit.QuarkusTestProfile; import io.quarkus.test.junit.TestProfile; -import jakarta.inject.Inject; -import jakarta.persistence.EntityManager; import org.apache.http.HttpHeaders; import org.apache.http.HttpStatus; import org.apache.kafka.streams.StreamsBuilder; @@ -48,7 +46,6 @@ import org.dependencytrack.proto.repometaanalysis.v1.AnalysisCommand; import org.dependencytrack.proto.repometaanalysis.v1.AnalysisResult; import org.dependencytrack.proto.repometaanalysis.v1.Component; -import org.dependencytrack.proto.repometaanalysis.v1.FetchMeta; import org.dependencytrack.repometaanalyzer.repositories.RepositoryAnalyzerFactory; import org.dependencytrack.repometaanalyzer.serde.KafkaPurlSerde; import org.junit.jupiter.api.AfterEach; @@ -56,6 +53,8 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import jakarta.inject.Inject; +import jakarta.persistence.EntityManager; import java.util.Map; import java.util.UUID; @@ -225,7 +224,7 @@ void testRepoMetaWithIntegrityMetaWithAuth() throws Exception { { "latest": "v6.6.6" } - """.getBytes(), + """.getBytes(), new ContentTypeHeader("application/json"))).withStatus(HttpStatus.SC_OK))); wireMockServer1.stubFor(head(urlPathEqualTo("/@apollo/federation/-/@apollo/federation-0.19.1.tgz")) @@ -241,7 +240,7 @@ void testRepoMetaWithIntegrityMetaWithAuth() throws Exception { .setPurl("pkg:npm/@apollo/federation@0.19.1") .setUuid(uuid.toString()) .setInternal(true)) - .setFetchMeta(FetchMeta.FETCH_META_INTEGRITY_DATA_AND_LATEST_VERSION).build()); + .build()); inputTopic.pipeInput(inputRecord); assertThat(outputTopic.getQueueSize()).isEqualTo(1); @@ -263,61 +262,6 @@ record -> { } - @Test - @TestTransaction - void testDifferentSourcesForRepoMeta() throws Exception { - entityManager.createNativeQuery(""" - INSERT INTO "REPOSITORY" ("TYPE", "ENABLED","IDENTIFIER", "INTERNAL", "URL", "AUTHENTICATIONREQUIRED", "RESOLUTION_ORDER") VALUES - ('NPM', true, 'central', true, :url1, false, 1), - ('NPM', true, 'internal', true, :url2, false, 2); - """) - .setParameter("url1", String.format("http://localhost:%d", wireMockServer1.port())) - .setParameter("url2", String.format("http://localhost:%d", wireMockServer2.port())) - .executeUpdate(); - wireMockServer1.stubFor(get(urlPathEqualTo("/-/package/%40apollo%2Ffederation/dist-tags")) - .willReturn(aResponse().withHeader(HttpHeaders.CONTENT_TYPE, "application/json") - .withResponseBody(Body.ofBinaryOrText(""" - { - "type": "version" - } - """.getBytes(), - new ContentTypeHeader("application/json"))) - .withStatus(HttpStatus.SC_OK))); - - wireMockServer2.stubFor(get(urlPathEqualTo("/-/package/%40apollo%2Ffederation/dist-tags")) - .willReturn(aResponse().withHeader(HttpHeaders.CONTENT_TYPE, "application/json") - .withResponseBody(Body.ofBinaryOrText(""" - { - "latest": "v6.6.6" - } - """.getBytes(), - new ContentTypeHeader("application/json"))) - .withStatus(HttpStatus.SC_OK))); - UUID uuid = UUID.randomUUID(); - final TestRecord inputRecord = new TestRecord<>(new PackageURL("pkg:npm/@apollo/federation@0.19.1"), - AnalysisCommand.newBuilder() - .setComponent(Component.newBuilder() - .setPurl("pkg:npm/@apollo/federation@0.19.1") - .setUuid(uuid.toString()) - .setInternal(true)) - .setFetchMeta(FetchMeta.FETCH_META_LATEST_VERSION).build()); - - inputTopic.pipeInput(inputRecord); - assertThat(outputTopic.getQueueSize()).isEqualTo(1); - assertThat(outputTopic.readRecordsToList()).satisfiesExactly( - record -> { - assertThat(record.key().getType()).isEqualTo(RepositoryType.NPM.toString().toLowerCase()); - assertThat(record.value()).isNotNull(); - final AnalysisResult result = record.value(); - assertThat(result.hasComponent()).isTrue(); - assertThat(result.getComponent().getUuid()).isEqualTo(uuid.toString()); - assertThat(result.getRepository()).isEqualTo("internal"); - assertThat(result.getLatestVersion()).isEqualTo("v6.6.6"); - assertThat(result.hasPublished()).isFalse(); - }); - - } - @Test @TestTransaction void testDifferentSourcesForRepoAndIntegrityMeta() throws Exception { @@ -334,7 +278,7 @@ void testDifferentSourcesForRepoAndIntegrityMeta() throws Exception { .withResponseBody(Body.ofBinaryOrText(""" { } - """.getBytes(), + """.getBytes(), new ContentTypeHeader("application/json"))) .withStatus(HttpStatus.SC_OK))); @@ -349,7 +293,7 @@ void testDifferentSourcesForRepoAndIntegrityMeta() throws Exception { { "latest": "v6.6.6" } - """.getBytes(), + """.getBytes(), new ContentTypeHeader("application/json"))) .withStatus(HttpStatus.SC_OK))); UUID uuid = UUID.randomUUID(); @@ -359,7 +303,7 @@ void testDifferentSourcesForRepoAndIntegrityMeta() throws Exception { .setPurl("pkg:npm/@apollo/federation@0.19.1") .setUuid(uuid.toString()) .setInternal(true)) - .setFetchMeta(FetchMeta.FETCH_META_INTEGRITY_DATA_AND_LATEST_VERSION).build()); + .build()); inputTopic.pipeInput(inputRecord); assertThat(outputTopic.getQueueSize()).isEqualTo(1);