diff --git a/minecode/model_utils.py b/minecode/model_utils.py index 97e875a8..6218ef26 100644 --- a/minecode/model_utils.py +++ b/minecode/model_utils.py @@ -44,7 +44,7 @@ ) -def add_package_to_scan_queue(package, pipelines=DEFAULT_PIPELINES, reindex_uri=False, priority=100): +def add_package_to_scan_queue(package, pipelines=DEFAULT_PIPELINES, priority=0, reindex_uri=False): """ Add a Package `package` to the scan queue to run the list of provided `pipelines` @@ -226,7 +226,7 @@ def merge_or_create_package(scanned_package, visit_level, override=False): If ``scanned_package`` does not exist in the PackageDB, create a new entry in the PackageDB for ``scanned_package``. - + If ``override`` is True, then all existing empty values of the PackageDB package are replaced by a non-empty value of the provided override. """ diff --git a/minecode/visitors/conan.py b/minecode/visitors/conan.py index dacbe206..1a7cd9a7 100644 --- a/minecode/visitors/conan.py +++ b/minecode/visitors/conan.py @@ -99,7 +99,7 @@ def get_download_info(conandata, version): return download_url, sha256 -def map_conan_package(package_url, pipelines): +def map_conan_package(package_url, pipelines, priority): """ Add a conan `package_url` to the PackageDB. @@ -134,7 +134,7 @@ def map_conan_package(package_url, pipelines): # Submit package for scanning if db_package: - add_package_to_scan_queue(db_package, pipelines) + add_package_to_scan_queue(db_package, pipelines, priority) return error @@ -154,11 +154,12 @@ def process_request(purl_str, **kwargs): package_url = PackageURL.from_string(purl_str) addon_pipelines = kwargs.get('addon_pipelines', []) pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + priority = kwargs.get('priority', 0) if not package_url.version: return - error_msg = map_conan_package(package_url, pipelines) + error_msg = map_conan_package(package_url, pipelines, priority) if error_msg: return error_msg diff --git a/minecode/visitors/debian.py b/minecode/visitors/debian.py index e0690cdd..28e0a778 100644 --- a/minecode/visitors/debian.py +++ b/minecode/visitors/debian.py @@ -337,6 +337,7 @@ def process_request(purl_str, **kwargs): source_purl = kwargs.get("source_purl", None) addon_pipelines = kwargs.get('addon_pipelines', []) pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + priority = kwargs.get('priority', 0) try: package_url = PackageURL.from_string(purl_str) @@ -351,15 +352,16 @@ def process_request(purl_str, **kwargs): has_version = bool(package_url.version) if has_version: error = map_debian_metadata_binary_and_source( - package_url=package_url, + package_url=package_url, source_package_url=source_package_url, pipelines=pipelines, + priority=priority, ) return error -def map_debian_package(debian_package, package_content, pipelines): +def map_debian_package(debian_package, package_content, pipelines, priority): """ Add a debian `package_url` to the PackageDB. @@ -372,7 +374,7 @@ def map_debian_package(debian_package, package_content, pipelines): error = '' purl = debian_package.package_url - if package_content == PackageContentType.BINARY: + if package_content == PackageContentType.BINARY: download_url = debian_package.binary_archive_url elif package_content == PackageContentType.SOURCE_ARCHIVE: download_url = debian_package.source_archive_url @@ -427,7 +429,7 @@ def map_debian_package(debian_package, package_content, pipelines): # Submit package for scanning if db_package: - add_package_to_scan_queue(db_package, pipelines) + add_package_to_scan_queue(db_package, pipelines, priority) return db_package, error @@ -507,13 +509,13 @@ def update_license_copyright_fields(package_from, package_to, replace=True): setattr(package_to, field, value) -def map_debian_metadata_binary_and_source(package_url, source_package_url, pipelines): +def map_debian_metadata_binary_and_source(package_url, source_package_url, pipelines, priority): """ Get metadata for the binary and source release of the Debian package `package_url` and save it to the PackageDB. Return an error string for errors that occur, or empty string if there is no error. - """ + """ error = '' if "repository_url" in package_url.qualifiers: @@ -522,7 +524,7 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url, pipel base_url = UBUNTU_BASE_URL else: base_url = DEBIAN_BASE_URL - + if "api_data_url" in package_url.qualifiers: metadata_base_url = package_url.qualifiers["api_data_url"] elif package_url.namespace == 'ubuntu': @@ -544,6 +546,7 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url, pipel debian_package, PackageContentType.BINARY, pipelines, + priority, ) if emsg: error += emsg @@ -552,7 +555,8 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url, pipel source_package, emsg = map_debian_package( debian_package, PackageContentType.SOURCE_ARCHIVE, - pipelines, + pipelines, + priority, ) if emsg: error += emsg @@ -594,7 +598,7 @@ def from_purls(cls, package_urls): def package_archive_version(self): """ Get the useful part of the debian package version used in - source, binary, metadata and copyright URLs optionally. + source, binary, metadata and copyright URLs optionally. """ debvers = DebVersion.from_string(self.package_url.version) if debvers.revision != "0": @@ -679,7 +683,7 @@ def package_copyright_url(self): copyright_file_string = "_copyright" if self.package_url.namespace == "ubuntu": copyright_file_string = "/copyright" - + metadata_version = self.package_archive_version if not self.source_package_url: metadata_package_name = self.package_url.name diff --git a/minecode/visitors/generic.py b/minecode/visitors/generic.py index 4eb044dd..e56117ff 100644 --- a/minecode/visitors/generic.py +++ b/minecode/visitors/generic.py @@ -27,7 +27,7 @@ logger.setLevel(logging.INFO) -def map_generic_package(package_url, pipelines): +def map_generic_package(package_url, pipelines, priority): """ Add a generic `package_url` to the PackageDB. @@ -52,7 +52,11 @@ def map_generic_package(package_url, pipelines): # Submit package for scanning if db_package: - add_package_to_scan_queue(db_package, pipelines) + add_package_to_scan_queue( + package=db_package, + pipelines=pipelines, + priority=priority, + ) return error @@ -67,6 +71,7 @@ def process_request(purl_str, **kwargs): addon_pipelines = kwargs.get('addon_pipelines', []) pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + priority = kwargs.get('priority', 0) try: package_url = PackageURL.from_string(purl_str) @@ -79,7 +84,7 @@ def process_request(purl_str, **kwargs): error = f'package_url {purl_str} does not contain a download_url qualifier' return error - error_msg = map_generic_package(package_url, pipelines) + error_msg = map_generic_package(package_url, pipelines, priority) if error_msg: return error_msg @@ -97,7 +102,7 @@ def packagedata_from_dict(package_data): return PackageData.from_data(cleaned_package_data) -def map_fetchcode_supported_package(package_url, pipelines): +def map_fetchcode_supported_package(package_url, pipelines, priority): """ Add a `package_url` supported by fetchcode to the PackageDB. @@ -122,7 +127,11 @@ def map_fetchcode_supported_package(package_url, pipelines): # Submit package for scanning if db_package: - add_package_to_scan_queue(db_package, pipelines) + add_package_to_scan_queue( + package=db_package, + pipelines=pipelines, + priority=priority, + ) return error @@ -176,6 +185,7 @@ def process_request_fetchcode_generic(purl_str, **kwargs): addon_pipelines = kwargs.get('addon_pipelines', []) pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + priority = kwargs.get('priority', 0) try: package_url = PackageURL.from_string(purl_str) @@ -183,7 +193,7 @@ def process_request_fetchcode_generic(purl_str, **kwargs): error = f"error occurred when parsing {purl_str}: {e}" return error - error_msg = map_fetchcode_supported_package(package_url, pipelines) + error_msg = map_fetchcode_supported_package(package_url, pipelines, priority) if error_msg: return error_msg diff --git a/minecode/visitors/github.py b/minecode/visitors/github.py index f439b041..b647601e 100644 --- a/minecode/visitors/github.py +++ b/minecode/visitors/github.py @@ -198,13 +198,15 @@ def process_request_dir_listed(purl_str, **kwargs): addon_pipelines = kwargs.get('addon_pipelines', []) pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + priority = kwargs.get('priority', 0) + try: package_url = PackageURL.from_string(purl_str) except ValueError as e: error = f"error occurred when parsing {purl_str}: {e}" return error - error_msg = map_fetchcode_supported_package(package_url, pipelines) + error_msg = map_fetchcode_supported_package(package_url, pipelines, priority) if error_msg: return error_msg diff --git a/minecode/visitors/gnu.py b/minecode/visitors/gnu.py index bf1f541a..0f269703 100644 --- a/minecode/visitors/gnu.py +++ b/minecode/visitors/gnu.py @@ -35,12 +35,13 @@ def process_request(purl_str, **kwargs): addon_pipelines = kwargs.get('addon_pipelines', []) pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + priority = kwargs.get('priority', 0) package_url = PackageURL.from_string(purl_str) if not package_url.version: return - error_msg = map_fetchcode_supported_package(package_url, pipelines) + error_msg = map_fetchcode_supported_package(package_url, pipelines, priority) if error_msg: return error_msg diff --git a/minecode/visitors/maven.py b/minecode/visitors/maven.py index 5d7a1133..d5d8799c 100644 --- a/minecode/visitors/maven.py +++ b/minecode/visitors/maven.py @@ -242,12 +242,12 @@ def merge_ancestors(ancestor_pom_texts, package): return package -def map_maven_package(package_url, package_content, pipelines, reindex_metadata=False): +def map_maven_package(package_url, package_content, pipelines, priority=0, reindex_metadata=False): """ Add a maven `package_url` to the PackageDB. Return an error string if errors have occured in the process. - + if ``reindex_metadata`` is True, only reindex metadata and DO NOT rescan the full package. """ from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package @@ -315,16 +315,20 @@ def map_maven_package(package_url, package_content, pipelines, reindex_metadata= msg = f'Failed to retrieve JAR: {package_url}' error += msg + '\n' logger.error(msg) - - if not reindex_metadata: + + if not reindex_metadata: # Submit package for scanning if db_package: - add_package_to_scan_queue(package=db_package, pipelines=pipelines) + add_package_to_scan_queue( + package=db_package, + pipelines=pipelines, + priority=priority + ) return db_package, error -def map_maven_binary_and_source(package_url, pipelines, reindex_metadata=False): +def map_maven_binary_and_source(package_url, pipelines, priority=0, reindex_metadata=False): """ Get metadata for the binary and source release of the Maven package `package_url` and save it to the PackageDB. @@ -336,6 +340,7 @@ def map_maven_binary_and_source(package_url, pipelines, reindex_metadata=False): package_url=package_url, package_content=PackageContentType.BINARY, pipelines=pipelines, + priority=priority, reindex_metadata=reindex_metadata, ) if emsg: @@ -347,6 +352,7 @@ def map_maven_binary_and_source(package_url, pipelines, reindex_metadata=False): package_url=source_package_url, package_content=PackageContentType.SOURCE_ARCHIVE, pipelines=pipelines, + priority=priority, reindex_metadata=reindex_metadata, ) if emsg: @@ -433,7 +439,7 @@ def process_request(purl_str, **kwargs): addon_pipelines = kwargs.get('addon_pipelines', []) pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) - + priority = kwargs.get('priority', 0) try: package_url = PackageURL.from_string(purl_str) @@ -444,7 +450,12 @@ def process_request(purl_str, **kwargs): has_version = bool(package_url.version) if has_version: reindex_metadata=kwargs.get("reindex_metadata", False) - error = map_maven_binary_and_source(package_url, pipelines, reindex_metadata=reindex_metadata) + error = map_maven_binary_and_source( + package_url, + pipelines, + reindex_metadata=reindex_metadata, + priority=priority, + ) else: error = map_maven_packages(package_url, pipelines) diff --git a/minecode/visitors/npm.py b/minecode/visitors/npm.py index 5906038a..e29207b9 100644 --- a/minecode/visitors/npm.py +++ b/minecode/visitors/npm.py @@ -127,7 +127,7 @@ def get_package_json(namespace, name, version): logger.error(f"HTTP error occurred: {err}") -def map_npm_package(package_url, pipelines): +def map_npm_package(package_url, pipelines, priority): """ Add a npm `package_url` to the PackageDB. @@ -156,7 +156,11 @@ def map_npm_package(package_url, pipelines): # Submit package for scanning if db_package: - add_package_to_scan_queue(db_package, pipelines) + add_package_to_scan_queue( + package=db_package, + pipelines=pipelines, + priority=priority + ) return error @@ -172,15 +176,16 @@ def process_request(purl_str, **kwargs): scan queue afterwards. """ from minecode.model_utils import DEFAULT_PIPELINES - + addon_pipelines = kwargs.get('addon_pipelines', []) pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + priority = kwargs.get('priority', 0) package_url = PackageURL.from_string(purl_str) if not package_url.version: return - error_msg = map_npm_package(package_url, pipelines) + error_msg = map_npm_package(package_url, pipelines, priority) if error_msg: return error_msg diff --git a/minecode/visitors/openssl.py b/minecode/visitors/openssl.py index 1993c1fa..c448af36 100644 --- a/minecode/visitors/openssl.py +++ b/minecode/visitors/openssl.py @@ -91,6 +91,7 @@ def get_uris(self, content): else: yield URI(uri=url, source_uri=self.uri, date=date, size=size) + # Indexing OpenSSL PURLs requires a GitHub API token. # Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. @priority_router.route('pkg:openssl/openssl@.*') @@ -104,9 +105,10 @@ def process_request_dir_listed(purl_str, **kwargs): PackageDB entry. The package is then added to the scan queue afterwards. """ from minecode.model_utils import DEFAULT_PIPELINES - + addon_pipelines = kwargs.get('addon_pipelines', []) pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + priority = kwargs.get('priority', 0) try: package_url = PackageURL.from_string(purl_str) @@ -114,7 +116,7 @@ def process_request_dir_listed(purl_str, **kwargs): error = f"error occurred when parsing {purl_str}: {e}" return error - error_msg = map_fetchcode_supported_package(package_url, pipelines) + error_msg = map_fetchcode_supported_package(package_url, pipelines, priority) if error_msg: return error_msg \ No newline at end of file diff --git a/packagedb/api.py b/packagedb/api.py index 6fa9b6e3..f516db73 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -783,7 +783,7 @@ class CollectViewSet(viewsets.ViewSet): like with several Debian packages. **Examples::** - + /api/collect/?purl=pkg:npm/foo@0.0.7&addon_pipelines=collect_symbols_ctags /api/collect/?purl=pkg:generic/busybox@1.36.1&addon_pipelines=collect_symbols_ctags&addon_pipelines=inspect_elf_binaries @@ -818,6 +818,10 @@ def list(self, request, format=None): purl = validated_data.get('purl') kwargs = dict() + # We want this request to have high priority since the user knows the + # exact package they want + kwargs['priority'] = 100 + if source_purl := validated_data.get('source_purl', None): kwargs["source_purl"] = source_purl @@ -1048,9 +1052,9 @@ def reindex_metadata(self, request, *args, **kwargs): If the package does not exist in the database this call does nothing. NOTE: this WILL NOT re-run scan and indexing in the background in contrast with the /collect and collect/index_packages endpoints. - + **Request example**:: - + /api/collect/reindex_metadata/?purl=pkg:npm/foo@0.0.7 """ @@ -1068,10 +1072,10 @@ def reindex_metadata(self, request, *args, **kwargs): packages = Package.objects.filter(**lookups) if packages.count() == 0: return Response( - {'status': f'Not recollecting: Package does not exist for {purl}'}, + {'status': f'Not recollecting: Package does not exist for {purl}'}, status=status.HTTP_400_BAD_REQUEST, ) - + # Pass to only reindex_metadata downstream kwargs["reindex_metadata"] = True # here we have a package(s) matching our purl and we want to recollect metadata live