diff --git a/prospector/commit_processor/feature_extractor.py b/prospector/commit_processor/feature_extractor.py index 3f7d505df..a3a5b8c06 100644 --- a/prospector/commit_processor/feature_extractor.py +++ b/prospector/commit_processor/feature_extractor.py @@ -20,6 +20,7 @@ def extract_features(commit: Commit, advisory_record: AdvisoryRecord) -> CommitF references_ghissue = extract_references_ghissue(commit.ghissue_refs) n_changed_files = extract_n_changed_files(commit.changed_files) contains_jira_reference = extract_contains_jira_reference(commit.jira_refs) + vulnerability_timestamp = extract_vuln_timestamp(advisory_record) commit_feature = CommitFeatures( commit=commit, references_vuln_id=references_vuln_id, @@ -30,6 +31,7 @@ def extract_features(commit: Commit, advisory_record: AdvisoryRecord) -> CommitF references_ghissue=references_ghissue, n_changed_files=n_changed_files, contains_jira_reference=contains_jira_reference, + vulnerability_timestamp=vulnerability_timestamp, ) return commit_feature @@ -54,6 +56,10 @@ def extract_changes_relevant_path( return any([changed_path in relevant_paths for changed_path in changed_paths]) +def extract_vuln_timestamp(advisory_record: AdvisoryRecord) -> int: + return advisory_record.published_timestamp + + def extract_avg_hunk_size(hunks: "list[tuple[int]]") -> int: n_hunks = len(hunks) diff --git a/prospector/commit_processor/test_feature_extractor.py b/prospector/commit_processor/test_feature_extractor.py index 6dfe09c7e..809e1d8ab 100644 --- a/prospector/commit_processor/test_feature_extractor.py +++ b/prospector/commit_processor/test_feature_extractor.py @@ -14,6 +14,7 @@ extract_references_ghissue, extract_references_vuln_id, extract_time_between_commit_and_advisory_record, + extract_vuln_timestamp, ) from .preprocessor import preprocess_commit @@ -60,6 +61,18 @@ def test_time_between_commit_and_advisory_record(): assert extract_time_between_commit_and_advisory_record(142, 100) == 42 +def test_extract_vuln_timestamp(): + + advisory_record = AdvisoryRecord( + vulnerability_id="TEST", + repository_url="https://github.com/apache/struts", + published_timestamp=100, + paths=["pom.xml"], + ) + + assert extract_vuln_timestamp(advisory_record) == 100 + + def test_extract_changes_relevant_path(): path_1 = "a/b.py" path_2 = "a/c.py" diff --git a/prospector/datamodel/commit_features.py b/prospector/datamodel/commit_features.py index d33d9899f..3d6023be4 100644 --- a/prospector/datamodel/commit_features.py +++ b/prospector/datamodel/commit_features.py @@ -13,3 +13,4 @@ class CommitFeatures(BaseModel): n_hunks: int = 0 n_changed_files: int = 0 contains_jira_reference: bool = False + vulnerability_timestamp: int = 0 diff --git a/prospector/datamodel/commit_features_test.py b/prospector/datamodel/commit_features_test.py index 49d00ddb8..709a0cc9b 100644 --- a/prospector/datamodel/commit_features_test.py +++ b/prospector/datamodel/commit_features_test.py @@ -18,6 +18,7 @@ def test_simple(): references_ghissue=True, n_changed_files=44, contains_jira_reference=True, + vulnerability_timestamp=100, ) assert commit_features.commit.repository == "https://github.com/abc/xyz" @@ -29,3 +30,4 @@ def test_simple(): assert commit_features.references_ghissue assert commit_features.n_changed_files == 44 assert commit_features.contains_jira_reference + assert commit_features.vulnerability_timestamp == 100