Skip to content

Commit

Permalink
refactor sync functions into separate files
Browse files Browse the repository at this point in the history
Signed-off-by: Hans Wernetti <[email protected]>
  • Loading branch information
hanzo committed Oct 29, 2024
1 parent db7b784 commit 13db598
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 63 deletions.
16 changes: 14 additions & 2 deletions cartography/intel/semgrep/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import neo4j

from cartography.config import Config
from cartography.intel.semgrep.findings import sync
from cartography.intel.semgrep.dependencies import sync_dependencies
from cartography.intel.semgrep.deployment import get_deployment
from cartography.intel.semgrep.deployment import load_semgrep_deployment
from cartography.intel.semgrep.findings import sync_findings
from cartography.util import timeit


Expand All @@ -20,4 +23,13 @@ def start_semgrep_ingestion(
if not config.semgrep_app_token:
logger.info('Semgrep import is not configured - skipping this module. See docs to configure.')
return
sync(neo4j_session, config.semgrep_app_token, config.update_tag, common_job_parameters)

# fetch and load the Semgrep deployment
semgrep_deployment = get_deployment(config.semgrep_app_token)
deployment_id = semgrep_deployment["id"]
deployment_slug = semgrep_deployment["slug"]
load_semgrep_deployment(neo4j_session, semgrep_deployment, config.update_tag)
common_job_parameters["DEPLOYMENT_ID"] = deployment_id

sync_dependencies(neo4j_session, config.semgrep_app_token, config.update_tag, common_job_parameters)
sync_findings(neo4j_session, config.semgrep_app_token, config.update_tag, common_job_parameters, deployment_slug)
48 changes: 48 additions & 0 deletions cartography/intel/semgrep/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
from requests.exceptions import ReadTimeout

from cartography.client.core.tx import load
from cartography.graph.job import GraphJob
from cartography.models.semgrep.dependencies import SemgrepGoLibrarySchema
from cartography.stats import get_stats_client
from cartography.util import merge_module_sync_metadata
from cartography.util import timeit

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -146,3 +149,48 @@ def load_dependencies(
lastupdated=update_tag,
DEPLOYMENT_ID=deployment_id,
)


@timeit
def cleanup(
neo4j_session: neo4j.Session,
common_job_parameters: Dict[str, Any],
) -> None:
logger.info("Running Semgrep Go Library cleanup job.")
go_libraries_cleanup_job = GraphJob.from_node_schema(
SemgrepGoLibrarySchema(), common_job_parameters,
)
go_libraries_cleanup_job.run(neo4j_session)


@timeit
def sync_dependencies(
neo4j_session: neo4j.Session,
semgrep_app_token: str,
update_tag: int,
common_job_parameters: Dict[str, Any],
) -> None:

deployment_id = common_job_parameters.get("DEPLOYMENT_ID")

if not deployment_id:
logger.warning("Missing Semgrep deployment ID. Skipping Semgrep dependencies sync job.")
return

logger.info("Running Semgrep dependencies sync job.")

# fetch and load dependencies for the Go ecosystem
raw_deps = get_dependencies(semgrep_app_token, deployment_id, ecosystems=["gomod"])
deps = transform_dependencies(raw_deps)
load_dependencies(neo4j_session, SemgrepGoLibrarySchema, deps, deployment_id, update_tag)

cleanup(neo4j_session, common_job_parameters)

merge_module_sync_metadata(
neo4j_session=neo4j_session,
group_type='Semgrep',
group_id=deployment_id,
synced_type='Dependency', # TODO: should this be "SemgrepDependency"?
update_tag=update_tag,
stat_handler=stat_handler,
)
51 changes: 51 additions & 0 deletions cartography/intel/semgrep/deployment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import logging
from typing import Any
from typing import Dict

import neo4j
import requests

from cartography.client.core.tx import load
from cartography.models.semgrep.deployment import SemgrepDeploymentSchema
from cartography.stats import get_stats_client
from cartography.util import timeit

logger = logging.getLogger(__name__)
stat_handler = get_stats_client(__name__)
_TIMEOUT = (60, 60)


@timeit
def get_deployment(semgrep_app_token: str) -> Dict[str, Any]:
"""
Gets the deployment associated with the passed Semgrep App token.
param: semgrep_app_token: The Semgrep App token to use for authentication.
"""
deployment = {}
deployment_url = "https://semgrep.dev/api/v1/deployments"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {semgrep_app_token}",
}
response = requests.get(deployment_url, headers=headers, timeout=_TIMEOUT)
response.raise_for_status()

data = response.json()
deployment["id"] = data["deployments"][0]["id"]
deployment["name"] = data["deployments"][0]["name"]
deployment["slug"] = data["deployments"][0]["slug"]

return deployment


@timeit
def load_semgrep_deployment(
neo4j_session: neo4j.Session, deployment: Dict[str, Any], update_tag: int,
) -> None:
logger.info(f"Loading Semgrep deployment info {deployment} into the graph...")
load(
neo4j_session,
SemgrepDeploymentSchema(),
[deployment],
lastupdated=update_tag,
)
72 changes: 14 additions & 58 deletions cartography/intel/semgrep/findings.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,6 @@

from cartography.client.core.tx import load
from cartography.graph.job import GraphJob
from cartography.intel.semgrep.dependencies import get_dependencies
from cartography.intel.semgrep.dependencies import load_dependencies
from cartography.intel.semgrep.dependencies import transform_dependencies
from cartography.models.semgrep.dependencies import SemgrepGoLibrarySchema
from cartography.models.semgrep.deployment import SemgrepDeploymentSchema
from cartography.models.semgrep.findings import SemgrepSCAFindingSchema
from cartography.models.semgrep.locations import SemgrepSCALocationSchema
from cartography.stats import get_stats_client
Expand All @@ -30,29 +25,6 @@
_MAX_RETRIES = 3


@timeit
def get_deployment(semgrep_app_token: str) -> Dict[str, Any]:
"""
Gets the deployment associated with the passed Semgrep App token.
param: semgrep_app_token: The Semgrep App token to use for authentication.
"""
deployment = {}
deployment_url = "https://semgrep.dev/api/v1/deployments"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {semgrep_app_token}",
}
response = requests.get(deployment_url, headers=headers, timeout=_TIMEOUT)
response.raise_for_status()

data = response.json()
deployment["id"] = data["deployments"][0]["id"]
deployment["name"] = data["deployments"][0]["name"]
deployment["slug"] = data["deployments"][0]["slug"]

return deployment


@timeit
def get_sca_vulns(semgrep_app_token: str, deployment_slug: str) -> List[Dict[str, Any]]:
"""
Expand Down Expand Up @@ -205,19 +177,6 @@ def transform_sca_vulns(raw_vulns: List[Dict[str, Any]]) -> Tuple[List[Dict[str,
return vulns, usages


@timeit
def load_semgrep_deployment(
neo4j_session: neo4j.Session, deployment: Dict[str, Any], update_tag: int,
) -> None:
logger.info(f"Loading Semgrep deployment info {deployment} into the graph...")
load(
neo4j_session,
SemgrepDeploymentSchema(),
[deployment],
lastupdated=update_tag,
)


@timeit
def load_semgrep_sca_vulns(
neo4j_session: neo4j.Session,
Expand Down Expand Up @@ -269,32 +228,29 @@ def cleanup(


@timeit
def sync(
neo4j_sesion: neo4j.Session,
def sync_findings(
neo4j_session: neo4j.Session,
semgrep_app_token: str,
update_tag: int,
common_job_parameters: Dict[str, Any],
deployment_slug: str,
) -> None:

deployment_id = common_job_parameters.get("DEPLOYMENT_ID")
if not deployment_id or not deployment_slug:
logger.warning("Missing Semgrep deployment ID or slug. Skipping SCA findings sync job.")
return

logger.info("Running Semgrep SCA findings sync job.")
semgrep_deployment = get_deployment(semgrep_app_token)
deployment_id = semgrep_deployment["id"]
deployment_slug = semgrep_deployment["slug"]
load_semgrep_deployment(neo4j_sesion, semgrep_deployment, update_tag)
common_job_parameters["DEPLOYMENT_ID"] = deployment_id
raw_vulns = get_sca_vulns(semgrep_app_token, deployment_slug)
vulns, usages = transform_sca_vulns(raw_vulns)
load_semgrep_sca_vulns(neo4j_sesion, vulns, deployment_id, update_tag)
load_semgrep_sca_usages(neo4j_sesion, usages, deployment_id, update_tag)
run_scoped_analysis_job('semgrep_sca_risk_analysis.json', neo4j_sesion, common_job_parameters)

# fetch and load dependencies for the Go ecosystem
raw_deps = get_dependencies(semgrep_app_token, deployment_id, ecosystems=["gomod"])
deps = transform_dependencies(raw_deps)
load_dependencies(neo4j_sesion, SemgrepGoLibrarySchema, deps, deployment_id, update_tag)
load_semgrep_sca_vulns(neo4j_session, vulns, deployment_id, update_tag)
load_semgrep_sca_usages(neo4j_session, usages, deployment_id, update_tag)
run_scoped_analysis_job('semgrep_sca_risk_analysis.json', neo4j_session, common_job_parameters)

cleanup(neo4j_sesion, common_job_parameters)
cleanup(neo4j_session, common_job_parameters)
merge_module_sync_metadata(
neo4j_session=neo4j_sesion,
neo4j_session=neo4j_session,
group_type='Semgrep',
group_id=deployment_id,
synced_type='SCA',
Expand Down
6 changes: 3 additions & 3 deletions tests/integration/cartography/intel/semgrep/test_findings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import cartography.intel.semgrep.findings
import tests.data.semgrep.sca
from cartography.intel.semgrep.findings import sync
from cartography.intel.semgrep.findings import sync_findings
from tests.integration.util import check_nodes
from tests.integration.util import check_rels

Expand Down Expand Up @@ -87,7 +87,7 @@ def _create_cve_nodes(neo4j_session):
"get_sca_vulns",
return_value=tests.data.semgrep.sca.RAW_VULNS,
)
def test_sync(mock_get_sca_vulns, mock_get_deployment, neo4j_session):
def test_sync_findings(mock_get_sca_vulns, mock_get_deployment, neo4j_session):
# Arrange
_create_github_repos(neo4j_session)
_create_dependency_nodes(neo4j_session)
Expand All @@ -98,7 +98,7 @@ def test_sync(mock_get_sca_vulns, mock_get_deployment, neo4j_session):
}

# Act
sync(neo4j_session, semgrep_app_token, TEST_UPDATE_TAG, common_job_parameters)
sync_findings(neo4j_session, semgrep_app_token, TEST_UPDATE_TAG, common_job_parameters)

# Assert

Expand Down

0 comments on commit 13db598

Please sign in to comment.