From c2e17850e0e708bdb75260a3c39fd0af111798ac Mon Sep 17 00:00:00 2001 From: Donny Peeters <46660228+Donnype@users.noreply.github.com> Date: Tue, 6 Feb 2024 21:46:02 +0100 Subject: [PATCH] Allow creation of declared scan profiles through normalizers (#2428) Signed-off-by: Donny Peeters Co-authored-by: Jan Klopper --- boefjes/boefjes/app.py | 8 +- boefjes/boefjes/config.py | 8 +- boefjes/boefjes/job_handler.py | 72 +++++++++++---- boefjes/boefjes/job_models.py | 8 +- boefjes/boefjes/local.py | 8 +- .../boefjes/plugins/kat_external_db/main.py | 2 +- .../plugins/kat_external_db/normalize.py | 13 ++- boefjes/tests/examples/external_db.json | 25 ++++++ boefjes/tests/test_scan_profiles.py | 87 +++++++++++++++++++ boefjes/tests/test_tasks.py | 2 +- boefjes/tools/run_boefje.py | 4 +- boefjes/tools/run_normalizer.py | 6 +- 12 files changed, 212 insertions(+), 31 deletions(-) create mode 100644 boefjes/tests/examples/external_db.json create mode 100644 boefjes/tests/test_scan_profiles.py diff --git a/boefjes/boefjes/app.py b/boefjes/boefjes/app.py index f228a590e51..bc5e8e2abb3 100644 --- a/boefjes/boefjes/app.py +++ b/boefjes/boefjes/app.py @@ -16,7 +16,7 @@ TaskStatus, ) from boefjes.config import Settings -from boefjes.job_handler import BoefjeHandler, NormalizerHandler +from boefjes.job_handler import BoefjeHandler, NormalizerHandler, bytes_api_client from boefjes.katalogus.local_repository import get_local_repository from boefjes.local import LocalBoefjeJobRunner, LocalNormalizerJobRunner from boefjes.runtime_interfaces import Handler, WorkerManager @@ -254,9 +254,11 @@ def _start_working( def get_runtime_manager(settings: Settings, queue: WorkerManager.Queue, log_level: str) -> WorkerManager: local_repository = get_local_repository() if queue is WorkerManager.Queue.BOEFJES: - item_handler = BoefjeHandler(LocalBoefjeJobRunner(local_repository), local_repository) + item_handler = BoefjeHandler(LocalBoefjeJobRunner(local_repository), local_repository, bytes_api_client) else: - item_handler = NormalizerHandler(LocalNormalizerJobRunner(local_repository)) + item_handler = NormalizerHandler( + LocalNormalizerJobRunner(local_repository), bytes_api_client, settings.scan_profile_whitelist + ) return SchedulerWorkerManager( item_handler, diff --git a/boefjes/boefjes/config.py b/boefjes/boefjes/config.py index 13b2f4e8cec..73650508447 100644 --- a/boefjes/boefjes/config.py +++ b/boefjes/boefjes/config.py @@ -3,7 +3,7 @@ from pathlib import Path from typing import Any, Dict, Optional, Tuple, Type -from pydantic import AmqpDsn, AnyHttpUrl, Field, FilePath, IPvAnyAddress, PostgresDsn +from pydantic import AmqpDsn, AnyHttpUrl, Field, FilePath, IPvAnyAddress, PostgresDsn, conint from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict from pydantic_settings.sources import EnvSettingsSource @@ -52,6 +52,12 @@ class Settings(BaseSettings): "1.1.1.1", description="Name server used for remote DNS resolution in the boefje runner" ) + scan_profile_whitelist: Dict[str, conint(strict=True, ge=0, le=4)] = Field( + default_factory=dict, + description="Whitelist for normalizer ids allowed to produce scan profiles, including a maximum level.", + examples=['{"kat_external_db_normalize": 3, "kat_dns_normalize": 1}'], + ) + # Queue configuration queue_uri: AmqpDsn = Field(..., description="KAT queue URI", examples=["amqp://"], validation_alias="QUEUE_URI") diff --git a/boefjes/boefjes/job_handler.py b/boefjes/boefjes/job_handler.py index 4a56c6b25f5..9ca71016150 100644 --- a/boefjes/boefjes/job_handler.py +++ b/boefjes/boefjes/job_handler.py @@ -3,7 +3,7 @@ import traceback from datetime import datetime, timedelta, timezone from enum import Enum -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional import requests from pydantic.tools import parse_obj_as @@ -16,13 +16,14 @@ BoefjeMeta, NormalizerMeta, NormalizerPlainOOI, + NormalizerScanProfile, ) from boefjes.katalogus.local_repository import LocalPluginRepository from boefjes.plugins.models import _default_mime_types from boefjes.runtime_interfaces import BoefjeJobRunner, Handler, NormalizerJobRunner from octopoes.api.models import Declaration, Observation from octopoes.connector.octopoes import OctopoesAPIConnector -from octopoes.models import OOI, Reference +from octopoes.models import OOI, Reference, ScanProfile from octopoes.models.exception import ObjectNotFoundException from octopoes.models.types import OOIType @@ -81,6 +82,10 @@ def serialize_ooi(ooi: OOI): return serialized_oois +def get_octopoes_api_connector(org_code: str): + return OctopoesAPIConnector(str(settings.octopoes_api), org_code) + + def get_environment_settings(boefje_meta: BoefjeMeta, environment_keys: List[str]) -> Dict[str, str]: try: katalogus_api = str(settings.katalogus_api).rstrip("/") @@ -106,9 +111,17 @@ def get_environment_settings(boefje_meta: BoefjeMeta, environment_keys: List[str class BoefjeHandler(Handler): - def __init__(self, job_runner, local_repository: LocalPluginRepository): - self.job_runner: BoefjeJobRunner = job_runner - self.local_repository: LocalPluginRepository = local_repository + def __init__( + self, + job_runner: BoefjeJobRunner, + local_repository: LocalPluginRepository, + bytes_client: BytesAPIClient, + octopoes_factory=get_octopoes_api_connector, + ): + self.job_runner = job_runner + self.local_repository = local_repository + self.bytes_client = bytes_client + self.octopoes_factory = octopoes_factory def handle(self, boefje_meta: BoefjeMeta) -> None: logger.info("Handling boefje %s[task_id=%s]", boefje_meta.boefje.id, str(boefje_meta.id)) @@ -129,7 +142,7 @@ def handle(self, boefje_meta: BoefjeMeta) -> None: boefje_meta.arguments["input"] = serialize_ooi( _find_ooi_in_past( Reference.from_str(boefje_meta.input_ooi), - get_octopoes_api_connector(boefje_meta.organization), + self.octopoes_factory(boefje_meta.organization), ) ) @@ -157,11 +170,11 @@ def handle(self, boefje_meta: BoefjeMeta) -> None: boefje_meta.ended_at = datetime.now(timezone.utc) logger.info("Saving to Bytes for boefje %s[%s]", boefje_meta.boefje.id, str(boefje_meta.id)) - bytes_api_client.save_boefje_meta(boefje_meta) + self.bytes_client.save_boefje_meta(boefje_meta) if boefje_results: for boefje_added_mime_types, output in boefje_results: - raw_file_id = bytes_api_client.save_raw( + raw_file_id = self.bytes_client.save_raw( boefje_meta.id, output, mime_types.union(boefje_added_mime_types) ) logger.debug( @@ -172,19 +185,28 @@ def handle(self, boefje_meta: BoefjeMeta) -> None: class NormalizerHandler(Handler): - def __init__(self, job_runner): - self.job_runner: NormalizerJobRunner = job_runner + def __init__( + self, + job_runner: NormalizerJobRunner, + bytes_client: BytesAPIClient, + octopoes_factory=get_octopoes_api_connector, + whitelist: Optional[Dict[str, int]] = None, + ): + self.job_runner = job_runner + self.bytes_client: BytesAPIClient = bytes_client + self.octopoes_factory = octopoes_factory + self.whitelist = whitelist def handle(self, normalizer_meta: NormalizerMeta) -> None: logger.info("Handling normalizer %s[%s]", normalizer_meta.normalizer.id, normalizer_meta.id) - raw = bytes_api_client.get_raw(normalizer_meta.raw_data.id) + raw = self.bytes_client.get_raw(normalizer_meta.raw_data.id) normalizer_meta.started_at = datetime.now(timezone.utc) try: results = self.job_runner.run(normalizer_meta, raw) - connector = get_octopoes_api_connector(normalizer_meta.raw_data.boefje_meta.organization) + connector = self.octopoes_factory(normalizer_meta.raw_data.boefje_meta.organization) for observation in results.observations: reference = Reference.from_str(observation.input_ooi) @@ -207,9 +229,25 @@ def handle(self, normalizer_meta: NormalizerMeta) -> None: valid_time=normalizer_meta.raw_data.boefje_meta.ended_at, ) ) + + corrected_scan_profiles = [] + for profile in results.scan_profiles: + profile.level = min(profile.level, self.whitelist.get(normalizer_meta.normalizer.id, profile.level)) + corrected_scan_profiles.append(profile) + + validated_scan_profiles = [ + profile + for profile in corrected_scan_profiles + if self.whitelist and profile.level <= self.whitelist.get(normalizer_meta.normalizer.id, -1) + ] + if validated_scan_profiles: + connector.save_many_scan_profiles( + [self._parse_scan_profile(scan_profile) for scan_profile in results.scan_profiles], + valid_time=normalizer_meta.raw_data.boefje_meta.ended_at, + ) finally: normalizer_meta.ended_at = datetime.now(timezone.utc) - bytes_api_client.save_normalizer_meta(normalizer_meta) + self.bytes_client.save_normalizer_meta(normalizer_meta) logger.info("Done with normalizer %s[%s]", normalizer_meta.normalizer.id, normalizer_meta.id) @@ -217,6 +255,10 @@ def handle(self, normalizer_meta: NormalizerMeta) -> None: def _parse_ooi(result: NormalizerPlainOOI): return parse_obj_as(OOIType, result.model_dump()) + @staticmethod + def _parse_scan_profile(result: NormalizerScanProfile): + return parse_obj_as(ScanProfile, result.model_dump()) -def get_octopoes_api_connector(org_code: str): - return OctopoesAPIConnector(str(settings.octopoes_api), org_code) + +class InvalidWhitelist(Exception): + pass diff --git a/boefjes/boefjes/job_models.py b/boefjes/boefjes/job_models.py index 2fcbc9e10d4..a92b699edb4 100644 --- a/boefjes/boefjes/job_models.py +++ b/boefjes/boefjes/job_models.py @@ -96,10 +96,16 @@ class NormalizerDeclaration(BaseModel): ooi: NormalizerPlainOOI +class NormalizerScanProfile(BaseModel): + scan_profile_type: str + model_config = ConfigDict(populate_by_name=True, extra="allow") + + class NormalizerResult(BaseModel): # Moves all validation logic to Pydantic - item: Union[NormalizerPlainOOI, NormalizerObservation, NormalizerDeclaration] + item: Union[NormalizerPlainOOI, NormalizerObservation, NormalizerDeclaration, NormalizerScanProfile] class NormalizerOutput(BaseModel): observations: List[NormalizerObservation] = [] declarations: List[NormalizerDeclaration] = [] + scan_profiles: List[NormalizerScanProfile] = [] diff --git a/boefjes/boefjes/local.py b/boefjes/boefjes/local.py index 5291c0b456f..5dcc44c89c2 100644 --- a/boefjes/boefjes/local.py +++ b/boefjes/boefjes/local.py @@ -13,12 +13,13 @@ NormalizerOutput, NormalizerPlainOOI, NormalizerResult, + NormalizerScanProfile, ObservationsWithoutInputOOI, UnsupportedReturnTypeNormalizer, ) from boefjes.katalogus.local_repository import LocalPluginRepository from boefjes.runtime_interfaces import BoefjeJobRunner, JobRuntimeError, NormalizerJobRunner -from octopoes.models import OOI +from octopoes.models import OOI, DeclaredScanProfile logger = logging.getLogger(__name__) @@ -98,12 +99,13 @@ def _parse_results(self, normalizer_meta: NormalizerMeta, results: List[Any]) -> return NormalizerOutput( observations=observations, declarations=[result.item for result in parsed if isinstance(result.item, NormalizerDeclaration)], + scan_profiles=[result.item for result in parsed if isinstance(result.item, NormalizerScanProfile)], ) @staticmethod def _parse(result: Any) -> NormalizerResult: - if not isinstance(result, dict): # Must be an OOI. This should be phased out together with Octopoes - if not isinstance(result, OOI): + if not isinstance(result, dict): # Must be an OOI or ScanProfile. Should be phased out with Octopoes dependency + if not isinstance(result, (OOI, DeclaredScanProfile)): raise UnsupportedReturnTypeNormalizer(str(type(result))) result = result.dict() diff --git a/boefjes/boefjes/plugins/kat_external_db/main.py b/boefjes/boefjes/plugins/kat_external_db/main.py index a56481990b1..7b602f0a581 100644 --- a/boefjes/boefjes/plugins/kat_external_db/main.py +++ b/boefjes/boefjes/plugins/kat_external_db/main.py @@ -1,4 +1,4 @@ -"""Boefje script for getting domaions and ipaddresses from dadb""" +"""Boefje script for getting domains and ipaddresses from dadb""" from os import getenv from typing import List, Tuple, Union diff --git a/boefjes/boefjes/plugins/kat_external_db/normalize.py b/boefjes/boefjes/plugins/kat_external_db/normalize.py index 8e6dbb424f0..ccb26c2c90d 100644 --- a/boefjes/boefjes/plugins/kat_external_db/normalize.py +++ b/boefjes/boefjes/plugins/kat_external_db/normalize.py @@ -4,7 +4,7 @@ from typing import Iterator, Union from boefjes.job_models import NormalizerMeta -from octopoes.models import OOI +from octopoes.models import OOI, DeclaredScanProfile from octopoes.models.ooi.dns.zone import Hostname from octopoes.models.ooi.network import IPAddressV4, IPAddressV6, IPV4NetBlock, IPV6NetBlock, Network @@ -50,18 +50,25 @@ def run(normalizer_meta: NormalizerMeta, raw: Union[bytes, str]) -> Iterator[OOI ip_address = address_type(address=address, network=network.reference) yield ip_address + yield DeclaredScanProfile(reference=ip_address.reference, level=3) addresses_count += 1 if mask < interface.ip.max_prefixlen: - yield block_type( + block = block_type( start_ip=ip_address.reference, mask=mask, network=network.reference, ) + yield block + yield DeclaredScanProfile(reference=block.reference, level=3) blocks_count += 1 for hostname in follow_path_in_dict(path=DOMAIN_LIST_PATH, path_dict=results): - yield Hostname(name=follow_path_in_dict(path=DOMAIN_ITEM_PATH, path_dict=hostname), network=network.reference) + hostname = Hostname( + name=follow_path_in_dict(path=DOMAIN_ITEM_PATH, path_dict=hostname), network=network.reference + ) + yield hostname + yield DeclaredScanProfile(reference=hostname.reference, level=3) hostnames_count += 1 logging.info( diff --git a/boefjes/tests/examples/external_db.json b/boefjes/tests/examples/external_db.json new file mode 100644 index 00000000000..aa714587517 --- /dev/null +++ b/boefjes/tests/examples/external_db.json @@ -0,0 +1,25 @@ +{ + "id": "f50de284-d3c1-4f87-ba68-07b957b7a48f", + "raw_data": { + "id": "66451567-2381-4248-b5a1-d0d0985e065f", + "boefje_meta": { + "id": "f29f76c5-f7b1-4662-89c6-6dc313a9f93f", + "boefje": { + "id": "external_db" + }, + "input_ooi": "Network|internet", + "arguments": {"input": {"name": "internet"}}, + "organization": "tst", + "started_at": "2010-07-27T11:26:42.679000+00:00", + "ended_at": "2010-07-27T11:26:48.679000+00:00" + }, + "mime_types": [ + { + "value": "external_db" + } + ] + }, + "normalizer": { + "id": "kat_external_db_normalize" + } +} diff --git a/boefjes/tests/test_scan_profiles.py b/boefjes/tests/test_scan_profiles.py new file mode 100644 index 00000000000..366c5296ce3 --- /dev/null +++ b/boefjes/tests/test_scan_profiles.py @@ -0,0 +1,87 @@ +import json +import os +from pathlib import Path +from unittest import TestCase, mock + +import pytest +from pydantic import ValidationError + +from boefjes.config import Settings +from boefjes.job_handler import NormalizerHandler +from boefjes.job_models import NormalizerMeta, NormalizerScanProfile +from boefjes.katalogus.local_repository import LocalPluginRepository +from boefjes.local import LocalNormalizerJobRunner +from tests.loading import get_dummy_data + + +class ScanProfileTest(TestCase): + def test_normalizer_can_yield_scan_profiles(self): + local_repository = LocalPluginRepository(Path(__file__).parent.parent / "boefjes" / "plugins") + runner = LocalNormalizerJobRunner(local_repository) + meta = NormalizerMeta.model_validate_json(get_dummy_data("external_db.json")) + + raw = json.dumps( + { + "ip_addresses": [{"ip_address": "127.0.0.1"}, {"ip_address": "10.0.0.0"}], + "domains": [{"domain": "example.com"}], + } + ) + output = runner.run(meta, bytes(raw, "UTF-8")) + + self.assertEqual(1, len(output.observations)) + self.assertEqual(3, len(output.observations[0].results)) + self.assertEqual(3, len(output.scan_profiles)) + + profile = output.scan_profiles[0] + self.assertIsInstance(profile, NormalizerScanProfile) + self.assertEqual("IPAddressV4|internet|127.0.0.1", profile.reference) + self.assertEqual(3, profile.level) + + profile = output.scan_profiles[1] + self.assertIsInstance(profile, NormalizerScanProfile) + self.assertEqual("IPAddressV4|internet|10.0.0.0", profile.reference) + self.assertEqual(3, profile.level) + + profile = output.scan_profiles[2] + self.assertIsInstance(profile, NormalizerScanProfile) + self.assertEqual("Hostname|internet|example.com", profile.reference) + self.assertEqual(3, profile.level) + + def test_job_handler_respects_whitelist(self): + raw = { + "ip_addresses": [{"ip_address": "127.0.0.1"}, {"ip_address": "10.0.0.0"}], + "domains": [{"domain": "example.com"}], + } + bytes_mock = mock.Mock() + bytes_mock.get_raw.return_value = json.dumps(raw) + octopoes = mock.Mock() + + local_repository = LocalPluginRepository(Path(__file__).parent.parent / "boefjes" / "plugins") + runner = LocalNormalizerJobRunner(local_repository) + meta = NormalizerMeta.model_validate_json(get_dummy_data("external_db.json")) + + os.environ["BOEFJES_SCAN_PROFILE_WHITELIST"] = '{"x": 5}' + with pytest.raises(ValidationError): + Settings() + + os.environ["BOEFJES_SCAN_PROFILE_WHITELIST"] = '{"x": -1}' + with pytest.raises(ValidationError): + Settings() + + os.environ["BOEFJES_SCAN_PROFILE_WHITELIST"] = '{"x": 3}' + NormalizerHandler(runner, bytes_mock, lambda x: octopoes, Settings().scan_profile_whitelist).handle(meta) + assert octopoes.save_many_scan_profiles.call_count == 0 + + os.environ["BOEFJES_SCAN_PROFILE_WHITELIST"] = '{"kat_external_db_normalize": 2}' + NormalizerHandler(runner, bytes_mock, lambda x: octopoes, Settings().scan_profile_whitelist).handle(meta) + assert octopoes.save_many_scan_profiles.call_count == 1 + assert octopoes.save_many_scan_profiles.call_args[0][0][0].level == 2 + + os.environ["BOEFJES_SCAN_PROFILE_WHITELIST"] = '{"kat_external_db_normalize": 3}' + NormalizerHandler(runner, bytes_mock, lambda x: octopoes, Settings().scan_profile_whitelist).handle(meta) + assert octopoes.save_many_scan_profiles.call_count == 2 + assert octopoes.save_many_scan_profiles.call_args[0][0][0].level == 3 + + os.environ["BOEFJES_SCAN_PROFILE_WHITELIST"] = '{"kat_external_db_normalize": 4, "abc": 0}' + NormalizerHandler(runner, bytes_mock, lambda x: octopoes, Settings().scan_profile_whitelist).handle(meta) + assert octopoes.save_many_scan_profiles.call_count == 3 diff --git a/boefjes/tests/test_tasks.py b/boefjes/tests/test_tasks.py index e7ac3c33b31..4091e614ad5 100644 --- a/boefjes/tests/test_tasks.py +++ b/boefjes/tests/test_tasks.py @@ -121,7 +121,7 @@ def test_handle_boefje_with_exception(self, mock_find_ooi_in_past, mock_bytes_ap local_repository = LocalPluginRepository(Path(__file__).parent / "modules") with pytest.raises(RuntimeError): # Bytes still saves exceptions before they are reraised - BoefjeHandler(LocalBoefjeJobRunner(local_repository), local_repository).handle(meta) + BoefjeHandler(LocalBoefjeJobRunner(local_repository), local_repository, mock_bytes_api_client).handle(meta) mock_bytes_api_client.save_boefje_meta.assert_called_once_with(meta) mock_bytes_api_client.save_raw.assert_called_once() diff --git a/boefjes/tools/run_boefje.py b/boefjes/tools/run_boefje.py index a8aa0b77736..c8546a9bfcb 100755 --- a/boefjes/tools/run_boefje.py +++ b/boefjes/tools/run_boefje.py @@ -11,7 +11,7 @@ sys.path.append(str(Path(__file__).resolve().parent.parent)) -from boefjes.job_handler import BoefjeHandler +from boefjes.job_handler import BoefjeHandler, bytes_api_client from boefjes.job_models import Boefje, BoefjeMeta from boefjes.katalogus.local_repository import get_local_repository from boefjes.local import LocalBoefjeJobRunner @@ -31,7 +31,7 @@ def run_boefje(start_pdb, organization_code, boefje_id, input_ooi): local_repository = get_local_repository() - handler = BoefjeHandler(LocalBoefjeJobRunner(local_repository), local_repository) + handler = BoefjeHandler(LocalBoefjeJobRunner(local_repository), local_repository, bytes_api_client) try: handler.handle(meta) except Exception: diff --git a/boefjes/tools/run_normalizer.py b/boefjes/tools/run_normalizer.py index 4e7861a8f63..351b8bdf69f 100755 --- a/boefjes/tools/run_normalizer.py +++ b/boefjes/tools/run_normalizer.py @@ -9,6 +9,8 @@ import click +from boefjes.config import settings + sys.path.append(str(Path(__file__).resolve().parent.parent)) from boefjes.job_handler import NormalizerHandler, bytes_api_client @@ -33,7 +35,9 @@ def run_normalizer(start_pdb, normalizer_id, raw_id): local_repository = get_local_repository() - handler = NormalizerHandler(LocalNormalizerJobRunner(local_repository)) + handler = NormalizerHandler( + LocalNormalizerJobRunner(local_repository), bytes_api_client, settings.scan_profile_whitelist + ) try: handler.handle(meta) except Exception: