Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CWE support in multiple importers #1526

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions vulnerabilities/importers/apache_httpd.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#

import logging
import re
import urllib

import requests
Expand All @@ -23,6 +24,8 @@
from vulnerabilities.importer import Reference
from vulnerabilities.importer import VulnerabilitySeverity
from vulnerabilities.severity_systems import APACHE_HTTPD
from vulnerabilities.utils import create_weaknesses_list
from vulnerabilities.utils import cwe_regex
from vulnerabilities.utils import get_item

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -102,11 +105,14 @@ def to_advisory(self, data):
)
)

weaknesses = get_weaknesses(data)

return AdvisoryData(
aliases=[alias],
summary=description or "",
affected_packages=affected_packages,
references=[reference],
weaknesses=weaknesses,
url=reference.url,
)

Expand Down Expand Up @@ -152,3 +158,97 @@ def fetch_links(url):
continue
links.append(urllib.parse.urljoin(url, link))
return links


def get_weaknesses(cve_data):
"""
Extract CWE IDs from CVE data.

Args:
cve_data (dict): The CVE data in a dictionary format.

Returns:
List[int]: A list of unique CWE IDs.

Examples:
>>> mock_cve_data1 = {
... "containers": {
... "cna": {
... "providerMetadata": {
... "orgId": "f0158376-9dc2-43b6-827c-5f631a4d8d09"
... },
... "title": "mod_macro buffer over-read",
... "problemTypes": [
... {
... "descriptions": [
... {
... "description": "CWE-125 Out-of-bounds Read",
... "lang": "en",
... "cweId": "CWE-125",
... "type": "CWE"
... }
... ]
... }
... ]
... }
... }
... }
>>> mock_cve_data2 = {
... "data_type": "CVE",
... "data_format": "MITRE",
... "data_version": "4.0",
... "generator": {
... "engine": "Vulnogram 0.0.9"
... },
... "CVE_data_meta": {
... "ID": "CVE-2022-28614",
... "ASSIGNER": "[email protected]",
... "TITLE": "read beyond bounds via ap_rwrite() ",
... "STATE": "PUBLIC"
... },
... "problemtype": {
... "problemtype_data": [
... {
... "description": [
... {
... "lang": "eng",
... "value": "CWE-190 Integer Overflow or Wraparound"
... }
... ]
... },
... {
... "description": [
... {
... "lang": "eng",
... "value": "CWE-200 Exposure of Sensitive Information to an Unauthorized Actor"
... }
... ]
... }
... ]
... }
... }

>>> get_weaknesses(mock_cve_data1)
[125]

>>> get_weaknesses(mock_cve_data2)
[190, 200]
"""
alias = get_item(cve_data, "CVE_data_meta", "ID")
cwe_strings = []
if alias:
problemtype_data = get_item(cve_data, "problemtype", "problemtype_data") or []
for problem in problemtype_data:
for desc in problem.get("description", []):
value = desc.get("value", "")
cwe_id_string_list = re.findall(cwe_regex, value)
cwe_strings.extend(cwe_id_string_list)
else:
problemTypes = cve_data.get("containers", {}).get("cna", {}).get("problemTypes", [])
descriptions = problemTypes[0].get("descriptions", []) if len(problemTypes) > 0 else []
for description in descriptions:
cwe_id_string = description.get("cweId", "")
cwe_strings.append(cwe_id_string)

weaknesses = create_weaknesses_list(cwe_strings)
return weaknesses
23 changes: 23 additions & 0 deletions vulnerabilities/importers/debian.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@
#

import logging
import re
from typing import Any
from typing import Iterable
from typing import List
from typing import Mapping

import requests
from cwe2.database import Database
from packageurl import PackageURL
from univers.version_range import DebianVersionRange
from univers.versions import DebianVersion
Expand All @@ -22,6 +24,7 @@
from vulnerabilities.importer import AffectedPackage
from vulnerabilities.importer import Importer
from vulnerabilities.importer import Reference
from vulnerabilities.utils import create_weaknesses_list
from vulnerabilities.utils import dedupe
from vulnerabilities.utils import get_item

Expand Down Expand Up @@ -93,6 +96,7 @@ def advisory_data(self) -> Iterable[AdvisoryData]:
yield from self.parse(pkg_name, records)

def parse(self, pkg_name: str, records: Mapping[str, Any]) -> Iterable[AdvisoryData]:

for cve_id, record in records.items():
affected_versions = []
fixed_versions = []
Expand Down Expand Up @@ -150,10 +154,29 @@ def parse(self, pkg_name: str, records: Mapping[str, Any]) -> Iterable[AdvisoryD
fixed_version=DebianVersion(fixed_version),
)
)
weaknesses = get_cwe_from_debian_advisory(record)

yield AdvisoryData(
aliases=[cve_id],
summary=record.get("description", ""),
affected_packages=affected_packages,
references=references,
weaknesses=weaknesses,
url=self.api_url,
)


def get_cwe_from_debian_advisory(record):
"""
Extracts CWE ID strings from the given raw_data and returns a list of CWE IDs.

>>> get_cwe_from_debian_advisory({"description":"PEAR HTML_QuickForm version 3.2.14 contains an eval injection (CWE-95) vulnerability in HTML_QuickForm's getSubmitValue method, HTML_QuickForm's validate method, HTML_QuickForm_hierselect's _setOptions method, HTML_QuickForm_element's _findValue method, HTML_QuickForm_element's _prepareValue method. that can result in Possible information disclosure, possible impact on data integrity and execution of arbitrary code. This attack appear to be exploitable via A specially crafted query string could be utilised, e.g. http://www.example.com/admin/add_practice_type_id[1]=fubar%27])%20OR%20die(%27OOK!%27);%20//&mode=live. This vulnerability appears to have been fixed in 3.2.15."})
[95]
>>> get_cwe_from_debian_advisory({"description":"There is no WEAKNESS DATA"})
[]
"""
description = record.get("description") or ""
pattern = r"CWE-\d+"
cwe_strings = re.findall(pattern, description)
weaknesses = create_weaknesses_list(cwe_strings)
return weaknesses
24 changes: 24 additions & 0 deletions vulnerabilities/importers/fireeye.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from vulnerabilities.importer import Importer
from vulnerabilities.importer import Reference
from vulnerabilities.utils import build_description
from vulnerabilities.utils import create_weaknesses_list
from vulnerabilities.utils import cwe_regex
from vulnerabilities.utils import dedupe

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -77,10 +79,13 @@ def parse_advisory_data(raw_data, file, base_path) -> AdvisoryData:
disc_credits = md_dict.get("## Discovery Credits") # not used
disc_timeline = md_dict.get("## Disclosure Timeline") # not used
references = md_dict.get("## References") or []
cwe_data = md_dict.get("## Common Weakness Enumeration") or []

return AdvisoryData(
aliases=get_aliases(database_id, cve_ref),
summary=build_description(" ".join(summary), " ".join(description)),
references=get_references(references),
weaknesses=get_weaknesses(cwe_data),
url=advisory_url,
)

Expand Down Expand Up @@ -140,3 +145,22 @@ def md_list_to_dict(md_list):
else:
md_dict[md_key].append(md_line)
return md_dict


def get_weaknesses(cwe_data):
"""
Return the list of CWE IDs as integers from a list of weakness summaries, e.g., [379].

>>> get_weaknesses([
... "CWE-379: Creation of Temporary File in Directory with Insecure Permissions",
... "CWE-362: Concurrent Execution using Shared Resource with Improper Synchronization ('Race Condition')"
... ])
[379, 362]
"""
cwe_list = []
for line in cwe_data:
cwe_ids = re.findall(cwe_regex, line)
cwe_list.extend(cwe_ids)

weaknesses = create_weaknesses_list(cwe_list)
return weaknesses
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,6 @@
}
],
"date_published": null,
"weaknesses": [],
"weaknesses": [476],
"url": "https://httpd.apache.org/security/json/CVE-2021-44224.json"
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,6 @@
}
],
"date_published": null,
"weaknesses": [],
"weaknesses": [190, 200],
"url": "https://httpd.apache.org/security/json/CVE-2022-28614.json"
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
]
}
],
"weaknesses": []
"weaknesses": [476]
},
{
"vulnerability_id": null,
Expand Down Expand Up @@ -103,6 +103,6 @@
]
}
],
"weaknesses": []
"weaknesses": [476]
}
]
38 changes: 37 additions & 1 deletion vulnerabilities/tests/test_debian.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
import os
import re
from unittest.mock import patch

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importers.debian import DebianImporter
from vulnerabilities.importers.debian import get_cwe_from_debian_advisory
from vulnerabilities.improvers.default import DefaultImprover
from vulnerabilities.improvers.valid_versions import DebianBasicImprover
from vulnerabilities.tests import util_tests
Expand Down Expand Up @@ -55,3 +56,38 @@ def test_debian_improver(mock_response):
result.extend(inference)
expected_file = os.path.join(TEST_DATA, f"debian-improver-expected.json")
util_tests.check_results_against_json(result, expected_file)


def test_get_cwe_from_debian_advisories():
record = {
"description": "Legion of the Bouncy Castle Legion of the Bouncy Castle Java Cryptography APIs 1.58 up to but not including 1.60 contains a CWE-580: Use of Externally-Controlled Input to Select Classes or Code ('Unsafe Reflection') vulnerability in XMSS/XMSS^MT private key deserialization that can result in Deserializing an XMSS/XMSS^MT private key can result in the execution of unexpected code. This attack appear to be exploitable via A handcrafted private key can include references to unexpected classes which will be picked up from the class path for the executing application. This vulnerability appears to have been fixed in 1.60 and later.",
"scope": "local",
"releases": {
"bookworm": {
"status": "resolved",
"repositories": {"bookworm": "1.72-2"},
"fixed_version": "1.60-1",
"urgency": "low",
},
"bullseye": {
"status": "resolved",
"repositories": {"bullseye": "1.68-2"},
"fixed_version": "1.60-1",
"urgency": "low",
},
"sid": {
"status": "resolved",
"repositories": {"sid": "1.77-1"},
"fixed_version": "1.60-1",
"urgency": "low",
},
"trixie": {
"status": "resolved",
"repositories": {"trixie": "1.77-1"},
"fixed_version": "1.60-1",
"urgency": "low",
},
},
}
result = get_cwe_from_debian_advisory(record)
assert result == [580]
17 changes: 17 additions & 0 deletions vulnerabilities/tests/test_fireeye.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from vulnerabilities.importer import Reference
from vulnerabilities.importers.fireeye import get_aliases
from vulnerabilities.importers.fireeye import get_references
from vulnerabilities.importers.fireeye import get_weaknesses
from vulnerabilities.importers.fireeye import md_list_to_dict
from vulnerabilities.importers.fireeye import parse_advisory_data
from vulnerabilities.tests import util_tests
Expand Down Expand Up @@ -217,3 +218,19 @@ def test_md_list_to_dict_2(self):
md_list = f.readlines()
md_dict = md_list_to_dict(md_list)
assert md_dict == expected_output

def test_get_weaknesses(self):
assert get_weaknesses(
[
"CWE-379: Creation of Temporary File in Directory with Insecure Permissions",
"CWE-362: Concurrent Execution using Shared Resource with Improper Synchronization ('Race Condition')",
]
) == [379, 362]
assert (
get_weaknesses(
[
"CWE-2345: This cwe id does not exist so it should generate Invalid CWE id error and return empty list."
]
)
== []
)
Loading