Skip to content

Commit

Permalink
Set package scan priority from collect to 100 #502
Browse files Browse the repository at this point in the history
Signed-off-by: Jono Yang <[email protected]>
  • Loading branch information
JonoYang committed Jul 17, 2024
1 parent 275d6da commit 4fc7923
Show file tree
Hide file tree
Showing 10 changed files with 82 additions and 42 deletions.
4 changes: 2 additions & 2 deletions minecode/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
)


def add_package_to_scan_queue(package, pipelines=DEFAULT_PIPELINES, reindex_uri=False, priority=100):
def add_package_to_scan_queue(package, pipelines=DEFAULT_PIPELINES, priority=0, reindex_uri=False):
"""
Add a Package `package` to the scan queue to run the list of provided `pipelines`
Expand Down Expand Up @@ -226,7 +226,7 @@ def merge_or_create_package(scanned_package, visit_level, override=False):
If ``scanned_package`` does not exist in the PackageDB, create a new entry in
the PackageDB for ``scanned_package``.
If ``override`` is True, then all existing empty values of the PackageDB package are replaced by
a non-empty value of the provided override.
"""
Expand Down
7 changes: 4 additions & 3 deletions minecode/visitors/conan.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def get_download_info(conandata, version):
return download_url, sha256


def map_conan_package(package_url, pipelines):
def map_conan_package(package_url, pipelines, priority):
"""
Add a conan `package_url` to the PackageDB.
Expand Down Expand Up @@ -134,7 +134,7 @@ def map_conan_package(package_url, pipelines):

# Submit package for scanning
if db_package:
add_package_to_scan_queue(db_package, pipelines)
add_package_to_scan_queue(db_package, pipelines, priority)

return error

Expand All @@ -154,11 +154,12 @@ def process_request(purl_str, **kwargs):
package_url = PackageURL.from_string(purl_str)
addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get('priority', 0)

if not package_url.version:
return

error_msg = map_conan_package(package_url, pipelines)
error_msg = map_conan_package(package_url, pipelines, priority)

if error_msg:
return error_msg
24 changes: 14 additions & 10 deletions minecode/visitors/debian.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ def process_request(purl_str, **kwargs):
source_purl = kwargs.get("source_purl", None)
addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get('priority', 0)

try:
package_url = PackageURL.from_string(purl_str)
Expand All @@ -351,15 +352,16 @@ def process_request(purl_str, **kwargs):
has_version = bool(package_url.version)
if has_version:
error = map_debian_metadata_binary_and_source(
package_url=package_url,
package_url=package_url,
source_package_url=source_package_url,
pipelines=pipelines,
priority=priority,
)

return error


def map_debian_package(debian_package, package_content, pipelines):
def map_debian_package(debian_package, package_content, pipelines, priority):
"""
Add a debian `package_url` to the PackageDB.
Expand All @@ -372,7 +374,7 @@ def map_debian_package(debian_package, package_content, pipelines):
error = ''

purl = debian_package.package_url
if package_content == PackageContentType.BINARY:
if package_content == PackageContentType.BINARY:
download_url = debian_package.binary_archive_url
elif package_content == PackageContentType.SOURCE_ARCHIVE:
download_url = debian_package.source_archive_url
Expand Down Expand Up @@ -427,7 +429,7 @@ def map_debian_package(debian_package, package_content, pipelines):

# Submit package for scanning
if db_package:
add_package_to_scan_queue(db_package, pipelines)
add_package_to_scan_queue(db_package, pipelines, priority)

return db_package, error

Expand Down Expand Up @@ -507,13 +509,13 @@ def update_license_copyright_fields(package_from, package_to, replace=True):
setattr(package_to, field, value)


def map_debian_metadata_binary_and_source(package_url, source_package_url, pipelines):
def map_debian_metadata_binary_and_source(package_url, source_package_url, pipelines, priority):
"""
Get metadata for the binary and source release of the Debian package
`package_url` and save it to the PackageDB.
Return an error string for errors that occur, or empty string if there is no error.
"""
"""
error = ''

if "repository_url" in package_url.qualifiers:
Expand All @@ -522,7 +524,7 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url, pipel
base_url = UBUNTU_BASE_URL
else:
base_url = DEBIAN_BASE_URL

if "api_data_url" in package_url.qualifiers:
metadata_base_url = package_url.qualifiers["api_data_url"]
elif package_url.namespace == 'ubuntu':
Expand All @@ -544,6 +546,7 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url, pipel
debian_package,
PackageContentType.BINARY,
pipelines,
priority,
)
if emsg:
error += emsg
Expand All @@ -552,7 +555,8 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url, pipel
source_package, emsg = map_debian_package(
debian_package,
PackageContentType.SOURCE_ARCHIVE,
pipelines,
pipelines,
priority,
)
if emsg:
error += emsg
Expand Down Expand Up @@ -594,7 +598,7 @@ def from_purls(cls, package_urls):
def package_archive_version(self):
"""
Get the useful part of the debian package version used in
source, binary, metadata and copyright URLs optionally.
source, binary, metadata and copyright URLs optionally.
"""
debvers = DebVersion.from_string(self.package_url.version)
if debvers.revision != "0":
Expand Down Expand Up @@ -679,7 +683,7 @@ def package_copyright_url(self):
copyright_file_string = "_copyright"
if self.package_url.namespace == "ubuntu":
copyright_file_string = "/copyright"

metadata_version = self.package_archive_version
if not self.source_package_url:
metadata_package_name = self.package_url.name
Expand Down
22 changes: 16 additions & 6 deletions minecode/visitors/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
logger.setLevel(logging.INFO)


def map_generic_package(package_url, pipelines):
def map_generic_package(package_url, pipelines, priority):
"""
Add a generic `package_url` to the PackageDB.
Expand All @@ -52,7 +52,11 @@ def map_generic_package(package_url, pipelines):

# Submit package for scanning
if db_package:
add_package_to_scan_queue(db_package, pipelines)
add_package_to_scan_queue(
package=db_package,
pipelines=pipelines,
priority=priority,
)

return error

Expand All @@ -67,6 +71,7 @@ def process_request(purl_str, **kwargs):

addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get('priority', 0)

try:
package_url = PackageURL.from_string(purl_str)
Expand All @@ -79,7 +84,7 @@ def process_request(purl_str, **kwargs):
error = f'package_url {purl_str} does not contain a download_url qualifier'
return error

error_msg = map_generic_package(package_url, pipelines)
error_msg = map_generic_package(package_url, pipelines, priority)

if error_msg:
return error_msg
Expand All @@ -97,7 +102,7 @@ def packagedata_from_dict(package_data):
return PackageData.from_data(cleaned_package_data)


def map_fetchcode_supported_package(package_url, pipelines):
def map_fetchcode_supported_package(package_url, pipelines, priority):
"""
Add a `package_url` supported by fetchcode to the PackageDB.
Expand All @@ -122,7 +127,11 @@ def map_fetchcode_supported_package(package_url, pipelines):

# Submit package for scanning
if db_package:
add_package_to_scan_queue(db_package, pipelines)
add_package_to_scan_queue(
package=db_package,
pipelines=pipelines,
priority=priority,
)

return error

Expand Down Expand Up @@ -176,14 +185,15 @@ def process_request_fetchcode_generic(purl_str, **kwargs):

addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get('priority', 0)

try:
package_url = PackageURL.from_string(purl_str)
except ValueError as e:
error = f"error occurred when parsing {purl_str}: {e}"
return error

error_msg = map_fetchcode_supported_package(package_url, pipelines)
error_msg = map_fetchcode_supported_package(package_url, pipelines, priority)

if error_msg:
return error_msg
4 changes: 3 additions & 1 deletion minecode/visitors/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,13 +198,15 @@ def process_request_dir_listed(purl_str, **kwargs):

addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get('priority', 0)

try:
package_url = PackageURL.from_string(purl_str)
except ValueError as e:
error = f"error occurred when parsing {purl_str}: {e}"
return error

error_msg = map_fetchcode_supported_package(package_url, pipelines)
error_msg = map_fetchcode_supported_package(package_url, pipelines, priority)

if error_msg:
return error_msg
3 changes: 2 additions & 1 deletion minecode/visitors/gnu.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,13 @@ def process_request(purl_str, **kwargs):

addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get('priority', 0)

package_url = PackageURL.from_string(purl_str)
if not package_url.version:
return

error_msg = map_fetchcode_supported_package(package_url, pipelines)
error_msg = map_fetchcode_supported_package(package_url, pipelines, priority)

if error_msg:
return error_msg
27 changes: 19 additions & 8 deletions minecode/visitors/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,12 +242,12 @@ def merge_ancestors(ancestor_pom_texts, package):
return package


def map_maven_package(package_url, package_content, pipelines, reindex_metadata=False):
def map_maven_package(package_url, package_content, pipelines, priority=0, reindex_metadata=False):
"""
Add a maven `package_url` to the PackageDB.
Return an error string if errors have occured in the process.
if ``reindex_metadata`` is True, only reindex metadata and DO NOT rescan the full package.
"""
from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package
Expand Down Expand Up @@ -315,16 +315,20 @@ def map_maven_package(package_url, package_content, pipelines, reindex_metadata=
msg = f'Failed to retrieve JAR: {package_url}'
error += msg + '\n'
logger.error(msg)
if not reindex_metadata:

if not reindex_metadata:
# Submit package for scanning
if db_package:
add_package_to_scan_queue(package=db_package, pipelines=pipelines)
add_package_to_scan_queue(
package=db_package,
pipelines=pipelines,
priority=priority
)

return db_package, error


def map_maven_binary_and_source(package_url, pipelines, reindex_metadata=False):
def map_maven_binary_and_source(package_url, pipelines, priority=0, reindex_metadata=False):
"""
Get metadata for the binary and source release of the Maven package
`package_url` and save it to the PackageDB.
Expand All @@ -336,6 +340,7 @@ def map_maven_binary_and_source(package_url, pipelines, reindex_metadata=False):
package_url=package_url,
package_content=PackageContentType.BINARY,
pipelines=pipelines,
priority=priority,
reindex_metadata=reindex_metadata,
)
if emsg:
Expand All @@ -347,6 +352,7 @@ def map_maven_binary_and_source(package_url, pipelines, reindex_metadata=False):
package_url=source_package_url,
package_content=PackageContentType.SOURCE_ARCHIVE,
pipelines=pipelines,
priority=priority,
reindex_metadata=reindex_metadata,
)
if emsg:
Expand Down Expand Up @@ -433,7 +439,7 @@ def process_request(purl_str, **kwargs):

addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)

priority = kwargs.get('priority', 0)

try:
package_url = PackageURL.from_string(purl_str)
Expand All @@ -444,7 +450,12 @@ def process_request(purl_str, **kwargs):
has_version = bool(package_url.version)
if has_version:
reindex_metadata=kwargs.get("reindex_metadata", False)
error = map_maven_binary_and_source(package_url, pipelines, reindex_metadata=reindex_metadata)
error = map_maven_binary_and_source(
package_url,
pipelines,
reindex_metadata=reindex_metadata,
priority=priority,
)
else:
error = map_maven_packages(package_url, pipelines)

Expand Down
13 changes: 9 additions & 4 deletions minecode/visitors/npm.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def get_package_json(namespace, name, version):
logger.error(f"HTTP error occurred: {err}")


def map_npm_package(package_url, pipelines):
def map_npm_package(package_url, pipelines, priority):
"""
Add a npm `package_url` to the PackageDB.
Expand Down Expand Up @@ -156,7 +156,11 @@ def map_npm_package(package_url, pipelines):

# Submit package for scanning
if db_package:
add_package_to_scan_queue(db_package, pipelines)
add_package_to_scan_queue(
package=db_package,
pipelines=pipelines,
priority=priority
)

return error

Expand All @@ -172,15 +176,16 @@ def process_request(purl_str, **kwargs):
scan queue afterwards.
"""
from minecode.model_utils import DEFAULT_PIPELINES

addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get('priority', 0)

package_url = PackageURL.from_string(purl_str)
if not package_url.version:
return

error_msg = map_npm_package(package_url, pipelines)
error_msg = map_npm_package(package_url, pipelines, priority)

if error_msg:
return error_msg
Loading

0 comments on commit 4fc7923

Please sign in to comment.