From 55a15458670eeb2679501aedd2820b0bce490717 Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Thu, 22 Jun 2023 17:17:10 -0400 Subject: [PATCH 01/24] init cleanup --- cartography/intel/aws/ecr.py | 10 ++++++++-- cartography/intel/aws/s3.py | 30 +++++++++++++++++++++++------- cartography/util.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 9 deletions(-) diff --git a/cartography/intel/aws/ecr.py b/cartography/intel/aws/ecr.py index 0b5dbb12e..dae4be6ae 100644 --- a/cartography/intel/aws/ecr.py +++ b/cartography/intel/aws/ecr.py @@ -1,4 +1,5 @@ import logging +from typing import Any from typing import Dict from typing import List @@ -9,6 +10,8 @@ from cartography.util import batch from cartography.util import run_cleanup_job from cartography.util import timeit +from cartography.util import to_async +from cartography.util import to_sync logger = logging.getLogger(__name__) @@ -148,9 +151,12 @@ def sync( logger.info("Syncing ECR for region '%s' in account '%s'.", region, current_aws_account_id) image_data = {} repositories = get_ecr_repositories(boto3_session, region) - for repo in repositories: - repo_image_obj = get_ecr_repository_images(boto3_session, region, repo['repositoryName']) + + async def async_get_images(repo: Dict[str, Any]) -> None: + repo_image_obj = await to_async(get_ecr_repository_images, boto3_session, region, repo['repositoryName']) image_data[repo['repositoryUri']] = repo_image_obj + to_sync(*[async_get_images(repo) for repo in repositories]) + load_ecr_repositories(neo4j_session, repositories, region, current_aws_account_id, update_tag) repo_images_list = transform_ecr_repository_images(image_data) load_ecr_repository_images(neo4j_session, repo_images_list, region, update_tag) diff --git a/cartography/intel/aws/s3.py b/cartography/intel/aws/s3.py index 6524bb4c4..d19da8032 100644 --- a/cartography/intel/aws/s3.py +++ b/cartography/intel/aws/s3.py @@ -1,3 +1,4 @@ +import asyncio import hashlib import json import logging @@ -20,6 +21,8 @@ from cartography.util import run_analysis_job from cartography.util import run_cleanup_job from cartography.util import timeit +from cartography.util import to_async +from cartography.util import to_sync logger = logging.getLogger(__name__) stat_handler = get_stats_client(__name__) @@ -55,7 +58,9 @@ def get_s3_bucket_details( # a local store for s3 clients so that we may re-use clients for an AWS region s3_regional_clients: Dict[Any, Any] = {} - for bucket in bucket_data['Buckets']: + BucketDetail = Tuple[str, Dict[str, Any], Dict[str, Any], Dict[str, Any], Dict[str, Any], Dict[str, Any]] + + async def _get_bucket_detail(bucket: Dict[str, Any]) -> BucketDetail: # Note: bucket['Region'] is sometimes None because # client.get_bucket_location() does not return a location constraint for buckets # in us-east-1 region @@ -63,12 +68,23 @@ def get_s3_bucket_details( if not client: client = boto3_session.client('s3', bucket['Region']) s3_regional_clients[bucket['Region']] = client - acl = get_acl(bucket, client) - policy = get_policy(bucket, client) - encryption = get_encryption(bucket, client) - versioning = get_versioning(bucket, client) - public_access_block = get_public_access_block(bucket, client) - yield bucket['Name'], acl, policy, encryption, versioning, public_access_block + ( + acl, + policy, + encryption, + versioning, + public_access_block, + ) = await asyncio.gather( + to_async(get_acl, bucket, client), + to_async(get_policy, bucket, client), + to_async(get_encryption, bucket, client), + to_async(get_versioning, bucket, client), + to_async(get_public_access_block, bucket, client), + ) + return bucket['Name'], acl, policy, encryption, versioning, public_access_block + + bucket_details = to_sync(*[_get_bucket_detail(bucket) for bucket in bucket_data['Buckets']]) + yield from bucket_details @timeit diff --git a/cartography/util.py b/cartography/util.py index 5d2615764..2baf1442e 100644 --- a/cartography/util.py +++ b/cartography/util.py @@ -1,9 +1,12 @@ +import asyncio import logging import re import sys +from functools import partial from functools import wraps from string import Template from typing import Any +from typing import Awaitable from typing import BinaryIO from typing import Callable from typing import cast @@ -25,6 +28,7 @@ from cartography.stats import get_stats_client from cartography.stats import ScopedStatsClient + if sys.version_info >= (3, 7): from importlib.resources import open_binary, read_text else: @@ -297,3 +301,27 @@ def batch(items: Iterable, size: int = DEFAULT_BATCH_SIZE) -> List[List]: items[i: i + size] for i in range(0, len(items), size) ] + + +def to_async(func: Callable, *args: Any, **kwargs: Any) -> asyncio.Future: + ''' + Returns a Future that will run a function in the default threadpool. + Helper until we start using pytohn 3.9's asyncio.to_thread + + example: + future = to_async(my_func, my_arg, my_arg2) + to_sync(future) + ''' + call = partial(func, *args, **kwargs) + return asyncio.get_event_loop().run_in_executor(None, call) + + +def to_sync(*awaitables: Awaitable[Any]) -> Any: + ''' + Waits for the Awaitable(s) to complete and returns their result(s). + See https://docs.python.org/3.8/library/asyncio-task.html#asyncio-awaitables + + example: + result = to_sync(my_async_func(my_arg), another_async(my_arg2))) + ''' + return asyncio.get_event_loop().run_until_complete(asyncio.gather(*awaitables)) From 3aa19aff8e55157c1d3e529dca9df64eb3a10a03 Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Fri, 23 Jun 2023 19:12:17 -0400 Subject: [PATCH 02/24] note about nest_asyncio --- cartography/util.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cartography/util.py b/cartography/util.py index 2baf1442e..d5ab33464 100644 --- a/cartography/util.py +++ b/cartography/util.py @@ -311,6 +311,10 @@ def to_async(func: Callable, *args: Any, **kwargs: Any) -> asyncio.Future: example: future = to_async(my_func, my_arg, my_arg2) to_sync(future) + + NOTE: to use this in a Jupyter notebook, you need to do: + # import nest_asyncio + # nest_asyncio.apply() ''' call = partial(func, *args, **kwargs) return asyncio.get_event_loop().run_in_executor(None, call) From 04d7fe9a462439cf4914be74b00ee7e1dbe011f3 Mon Sep 17 00:00:00 2001 From: Jeremy Chapeau <113923302+resilience-jychp@users.noreply.github.com> Date: Fri, 23 Jun 2023 18:20:33 +0200 Subject: [PATCH 03/24] Hotfix (#1190) This PR fix two bugs in current version : - #1189 - #1182 --- cartography/cli.py | 3 +++ cartography/intel/github/repos.py | 5 ++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cartography/cli.py b/cartography/cli.py index 4ae159e40..db4041f8f 100644 --- a/cartography/cli.py +++ b/cartography/cli.py @@ -665,6 +665,9 @@ def main(self, argv: str) -> int: ) config.duo_api_key = os.environ.get(config.duo_api_key_env_var) config.duo_api_secret = os.environ.get(config.duo_api_secret_env_var) + else: + config.duo_api_key = None + config.duo_api_secret = None # Run cartography try: diff --git a/cartography/intel/github/repos.py b/cartography/intel/github/repos.py index 8fcadbd7f..238537e03 100644 --- a/cartography/intel/github/repos.py +++ b/cartography/intel/github/repos.py @@ -312,14 +312,13 @@ def _transform_python_requirements( continue try: req = Requirement(stripped_line) + parsed_list.append(req) except InvalidRequirement: # INFO and not WARN/ERROR as we intentionally don't support all ways to specify Python requirements logger.info( f"Failed to parse line \"{line}\" in repo {repo_url}'s requirements.txt; skipping line.", exc_info=True, ) - continue - parsed_list.append(req) for req in parsed_list: pinned_version = None @@ -563,5 +562,5 @@ def sync( logger.info("Syncing GitHub repos") repos_json = get(github_api_key, github_url, organization) repo_data = transform(repos_json) - load(neo4j_session, repo_data, common_job_parameters['UPDATE_TAG']) + load(neo4j_session, common_job_parameters, repo_data) run_cleanup_job('github_repos_cleanup.json', neo4j_session, common_job_parameters) From 17f12889d461d60e70c36cee762acc265826f964 Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Fri, 23 Jun 2023 12:40:17 -0400 Subject: [PATCH 04/24] 0.81.0 (#1191) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 774cbae0b..07a7f9cee 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages from setuptools import setup -__version__ = '0.80.0' +__version__ = '0.81.0' setup( From 362cfd3c0fd59fff1c3c709588db24db6d7673d0 Mon Sep 17 00:00:00 2001 From: Hector Eryx Paredes Camacho Date: Mon, 10 Jul 2023 15:56:22 -0600 Subject: [PATCH 05/24] Set to None the gsuite token instead of github one (#1204) --- cartography/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cartography/cli.py b/cartography/cli.py index db4041f8f..cd01e0240 100644 --- a/cartography/cli.py +++ b/cartography/cli.py @@ -638,7 +638,7 @@ def main(self, argv: str) -> int: logger.debug(f"Reading config string for GSuite from environment variable {config.gsuite_tokens_env_var}") config.gsuite_config = os.environ.get(config.gsuite_tokens_env_var) else: - config.github_config = None + config.gsuite_tokens_env_var = None # Lastpass config if config.lastpass_cid_env_var: From 94bfabc44d7832138b2cf7f847e63f448ea72712 Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Tue, 11 Jul 2023 17:04:01 -0400 Subject: [PATCH 06/24] 0.82.0.dev1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 07a7f9cee..5d86f6ae8 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages from setuptools import setup -__version__ = '0.81.0' +__version__ = '0.82.0.dev1' setup( From 38a7a405359003056c09b60d02568d42f2e25f1a Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Wed, 12 Jul 2023 11:33:29 -0400 Subject: [PATCH 07/24] '0.82.0.dev2' --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5d86f6ae8..77542dedb 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages from setuptools import setup -__version__ = '0.82.0.dev1' +__version__ = '0.82.0.dev2' setup( From 9feb4fd8b29950bd76c1f15eeb6bd2be8a42d7f0 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Thu, 13 Jul 2023 19:17:23 -0700 Subject: [PATCH 08/24] Update usage docs and roadmap links (#1196) Adds docs on - discovering what other nodes are connected to a given node type - discovering what node properties are present on a given node type Updates docs on - roadmap link - making syncmetadata docs more discoverable --- README.md | 13 +-- .../modules/_cartography-metadata/schema.md | 18 +++ docs/root/usage/schema.md | 1 + docs/root/usage/tutorial.md | 105 +++++++++++++++--- docs/schema/syncmetadata.md | 18 +-- 5 files changed, 113 insertions(+), 42 deletions(-) create mode 100644 docs/root/modules/_cartography-metadata/schema.md diff --git a/README.md b/README.md index 62ae48bbd..cac7c3d26 100644 --- a/README.md +++ b/README.md @@ -39,16 +39,13 @@ Start [here](https://lyft.github.io/cartography/install.html). ## Usage Start with our [tutorial](https://lyft.github.io/cartography/usage/tutorial.html). Our [data schema](https://lyft.github.io/cartography/usage/schema.html) is a helpful reference when you get stuck. -## Contact +## Community - Join us on `#cartography` on the [Lyft OSS Slack](https://join.slack.com/t/lyftoss/shared_invite/enQtOTYzODg5OTQwNDE2LTFiYjgwZWM3NTNhMTFkZjc4Y2IxOTI4NTdiNTdhNjQ4M2Q5NTIzMjVjOWI4NmVlNjRiZmU2YzA5NTc3MmFjYTQ). - -## Community Meeting - -Talk to us and see what we're working on at our [monthly community meeting](https://calendar.google.com/calendar/embed?src=lyft.com_p10o6ceuiieq9sqcn1ef61v1io%40group.calendar.google.com&ctz=America%2FLos_Angeles). -- Meeting minutes are [here](https://docs.google.com/document/d/1VyRKmB0dpX185I15BmNJZpfAJ_Ooobwz0U1WIhjDxvw). -- Recorded videos are posted [here](https://www.youtube.com/playlist?list=PLMga2YJvAGzidUWJB_fnG7EHI4wsDDsE1). -- Our current project road map is [here](https://docs.google.com/document/d/18MOsGI-isFvag1fGk718Aht7wQPueWd4SqOI9KapBa8/edit#heading=h.15nsmgmjaaml). +- Talk to us and see what we're working on at our [monthly community meeting](https://calendar.google.com/calendar/embed?src=lyft.com_p10o6ceuiieq9sqcn1ef61v1io%40group.calendar.google.com&ctz=America%2FLos_Angeles). + - Meeting minutes are [here](https://docs.google.com/document/d/1VyRKmB0dpX185I15BmNJZpfAJ_Ooobwz0U1WIhjDxvw). + - Recorded videos are posted [here](https://www.youtube.com/playlist?list=PLMga2YJvAGzidUWJB_fnG7EHI4wsDDsE1). +- Our current project roadmap is [here](https://github.com/orgs/lyft/projects/26/views/1). ## Contributing Thank you for considering contributing to Cartography! diff --git a/docs/root/modules/_cartography-metadata/schema.md b/docs/root/modules/_cartography-metadata/schema.md new file mode 100644 index 000000000..878d8268d --- /dev/null +++ b/docs/root/modules/_cartography-metadata/schema.md @@ -0,0 +1,18 @@ +## Cartography metadata schema + +.. _metadata_schema: + +Some Cartography sync jobs write nodes to convey information about the job itself. See https://github.com/lyft/cartography/issues/758 for more background on this. + +### SyncMetadata:ModuleSyncMetadata + +This is a node to represent metadata about the sync job of a particular module. Its existence indicates that a particular sync job did happen. +The 'types' used here should be actual node labels. For example, if we did sync a particular AWSAccount's S3Buckets, +the `grouptype` is 'AWSAccount', the `groupid` is the particular account's `id`, and the `syncedtype` is 'S3Bucket'. + +| Field | Description | Source| +|-------|-------------|------| +|**id**|`{group_type}_{group_id}_{synced_type}`|util.py| +|grouptype| The parent module's type |util.py| +|groupid|The parent module's id|util.py| +|syncedtype|The sub-module's type|util.py| diff --git a/docs/root/usage/schema.md b/docs/root/usage/schema.md index 3d6da845c..a5f1d101d 100644 --- a/docs/root/usage/schema.md +++ b/docs/root/usage/schema.md @@ -22,6 +22,7 @@ - In these docs, more specific nodes will be decorated with `GenericNode::SpecificNode` notation. For example, if we have a `Car` node and a `RaceCar` node, we will refer to the `RaceCar` as `Car::RaceCar`. +.. mdinclude:: ../modules/_cartography-metadata/schema.md .. mdinclude:: ../modules/aws/schema.md .. mdinclude:: ../modules/azure/schema.md .. mdinclude:: ../modules/crxcavator/schema.md diff --git a/docs/root/usage/tutorial.md b/docs/root/usage/tutorial.md index 357c8a7f2..f0cb52fa9 100644 --- a/docs/root/usage/tutorial.md +++ b/docs/root/usage/tutorial.md @@ -2,24 +2,16 @@ Once everything has been installed and synced, you can view the Neo4j web interface at http://localhost:7474. You can view the reference on this [here](https://neo4j.com/developer/guide-neo4j-browser/#_installing_and_starting_neo4j_browser). -### Permalinking Bookmarklet +If you already know Neo4j and just need to know what are the nodes, attributes, and graph relationships for our representation of infrastructure assets, you can view our [sample queries](samplequeries.html). More sample queries are available at https://github.com/marco-lancini/cartography-queries. -You can set up a bookmarklet that lets you quickly get a permalink to a Cartography query. To do so, add a bookmark with the following contents as the URL - make sure to replace `neo4j.contoso.com:7474` with your instance of Neo4j: +Otherwise, read on for this handhold-y tutorial filled with examples. Suppose we wanted to find out: -```javascript -javascript:(() => { const query = document.querySelectorAll('article label span')[0].innerText; if (query === ':server connect') { console.log('no query has been run!'); return; } const searchParams = new URLSearchParams(); searchParams.append('connectURL', 'bolt://neo4j:neo4j@neo4j.contoso.net:7687'); searchParams.append('cmd', 'edit'); searchParams.append('arg', query.replaceAll(/\r /g, '\r')); newURL = `http://neo4j.contoso.net:7474/browser/?${searchParams}`; window.open(newURL, '_blank', 'noopener'); })() -``` - -Then, any time you are in the web interface, you can click the bookmarklet to open a new tab with a permalink to your most recently executed query in the URL bar. - -### ℹ️ Already know [how to query Neo4j](https://neo4j.com/developer/cypher-query-language/)? You can skip to our reference material! -If you already know Neo4j and just need to know what are the nodes, attributes, and graph relationships for our representation of infrastructure assets, you can skip this handholdy walkthrough and see our [sample queries](samplequeries.md). - -### What [RDS](https://aws.amazon.com/rds/) instances are installed in my [AWS](https://aws.amazon.com/) accounts? -``` +### What [RDS](https://aws.amazon.com/rds/) instances are installed in my AWS accounts? +```cypher MATCH (aws:AWSAccount)-[r:RESOURCE]->(rds:RDSInstance) return * ``` + ![Visualization of RDS nodes and AWS nodes](../images/accountsandrds.png) In this query we asked Neo4j to find all `[:RESOURCE]` relationships from AWSAccounts to RDSInstances, and return the nodes and the `:RESOURCE` relationships. @@ -35,7 +27,7 @@ and then pick options on the menu that shows up at the bottom of the view like t ### Which RDS instances have [encryption](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Overview.Encryption.html) turned off? -``` +```cypher MATCH (a:AWSAccount)-[:RESOURCE]->(rds:RDSInstance{storage_encrypted:false}) RETURN a.name, rds.id ``` @@ -49,7 +41,7 @@ If you want to go back to viewing the graph and not a table, simply make sure yo Let's look at some other AWS assets now. ### Which [EC2](https://aws.amazon.com/ec2/) instances are directly exposed to the internet? -``` +```cypher MATCH (instance:EC2Instance{exposed_internet: true}) RETURN instance.instanceid, instance.publicdnsname ``` @@ -60,7 +52,7 @@ These instances are open to the internet either through permissive inbound IP pe If you know a lot about AWS, you may have noticed that EC2 instances [don't actually have an exposed_internet field](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_Instance.html). We're able to query for this because Cartography performs some [data enrichment](#data-enrichment) to add this field to EC2Instance nodes. ### Which [S3](https://aws.amazon.com/s3/) buckets have a policy granting any level of anonymous access to the bucket? -``` +```cypher MATCH (s:S3Bucket) WHERE s.anonymous_access = true RETURN s @@ -76,13 +68,81 @@ A couple of other things to notice: instead of using the "{}" notation to filter Let's go back to analyzing RDS instances. In an earlier example we queried for RDS instances that have encryption turned off. We can aggregate this data by AWSAccount with a small change: -``` +```cypher MATCH (a:AWSAccount)-[:RESOURCE]->(rds:RDSInstance) WHERE rds.storage_encrypted = false RETURN a.name as AWSAccount, count(rds) as UnencryptedInstances ``` ![Table of unencrypted RDS instances by AWS account](../images/unencryptedcounts.png) + +### Given a node label, what other node labels can be connected to it? + +Suppose we wanted to know what other assets can be connected to a DNSRecord. We would ask the graph like this: + +```cypher +match (d:DNSRecord)--(n) +return distinct labels(n); +``` + +This says "what are the possible labels for all nodes connected to all DNSRecord nodes `d` in my graph?" Your answer might look like this: + +``` +["AWSDNSRecord", "DNSRecord"] +["AWSDNSZone", "DNSZone"] +["LoadBalancerV2"] +["NameServer"] +["ESDomain"] +["LoadBalancer"] +["EC2Instance", "Instance"] +``` + +You can then make the path more specific like this: + +```cypher +match (d:DNSRecord)--(:EC2Instance)--(n) +return distinct labels(n); +``` + +And then you can continue building your query. + +We also include [full schema docs](schema.html), but this way of building a query can be faster and more interactive. + + +### Given a node label, what are the possible property names defined on it? + +We can find what properties are available on an S3Bucket like this: + +```cypher +match (n:S3Bucket) return properties(n) limit 1; +``` + +The result will look like this: + +``` +{ + "bucket_key_enabled": false, + "creationdate": "2022-05-10 00:22:52+00:00", + "ignore_public_acls": true, + "anonymous_access": false, + "firstseen": 1652400141863, + "block_public_policy": true, + "versioning_status": "Enabled", + "block_public_acls": true, + "anonymous_actions": [], + "name": "my-fake-bucket-123", + "lastupdated": 1688605272, + "encryption_algorithm": "AES256", + "default_encryption": true, + "id": "my-fake-bucket-123", + "arn": "arn:aws:s3:::my-fake-bucket-123", + "restrict_public_buckets": false +} +``` + +Our [full schema docs](schema.html) describe all possible fields, but listing out properties this way lets you avoid switching between browser tabs. + + ### Learning more If you want to learn more in depth about Neo4j and Cypher queries you can look at [this tutorial](https://neo4j.com/developer/cypher-query-language/) and see this [reference card](https://neo4j.com/docs/cypher-refcard/current/). @@ -117,3 +177,14 @@ You can add your own custom attributes and relationships without writing Python ### Mapping AWS Access Permissions Cartography can map permissions between IAM Principals and resources in the graph. Here's [how](../modules/aws/permissions-mapping.html). + + +### Permalinking Bookmarklet + +You can set up a bookmarklet that lets you quickly get a permalink to a Cartography query. To do so, add a bookmark with the following contents as the URL - make sure to replace `neo4j.contoso.com:7474` with your instance of Neo4j: + +```javascript +javascript:(() => { const query = document.querySelectorAll('article label span')[0].innerText; if (query === ':server connect') { console.log('no query has been run!'); return; } const searchParams = new URLSearchParams(); searchParams.append('connectURL', 'bolt://neo4j:neo4j@neo4j.contoso.net:7687'); searchParams.append('cmd', 'edit'); searchParams.append('arg', query.replaceAll(/\r /g, '\r')); newURL = `http://neo4j.contoso.net:7474/browser/?${searchParams}`; window.open(newURL, '_blank', 'noopener'); })() +``` + +Then, any time you are in the web interface, you can click the bookmarklet to open a new tab with a permalink to your most recently executed query in the URL bar. diff --git a/docs/schema/syncmetadata.md b/docs/schema/syncmetadata.md index baad4be28..7572a7ff2 100644 --- a/docs/schema/syncmetadata.md +++ b/docs/schema/syncmetadata.md @@ -1,17 +1 @@ -## SyncMetadata - -SyncMetadata nodes are created by sync jobs to convey information about the job itself. See this doc for how this is -used. - -## SyncMetadata:ModuleSyncMetadata - -This is a node to represent some metadata about the sync job of a particular module or sub-module. Its existence should suggest that a paritcular sync job did happen. -The 'types' used here should be actual node labels. For example, if we did sync a particular AWSAccount's S3Buckets, -the `grouptype` is 'AWSAccount', the `groupid` is the particular account's `id`, and the `syncedtype` is 'S3Bucket'. - -| Field | Description | Source| -|-------|-------------|------| -|**id**|`{group_type}_{group_id}_{synced_type}`|util.py| -|grouptype| The parent module's type |util.py| -|groupid|The parent module's id|util.py| -|syncedtype|The sub-module's type|util.py| +This document has been moved [here](https://lyft.github.io/cartography/modules/_cartography-metadata/schema.html) From 23185a66062c1003270e522b4924dc0c9b36f757 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Fri, 14 Jul 2023 13:16:49 -0700 Subject: [PATCH 09/24] #1210: EBSVolume => new data model, Allow node attr updates from multiple intel modules (#1214) See #1210 for full context. #1154 tried to solve this problem by updating the querybuilder but this was too complex and would not generalize well. This solution is simpler where we use different property classes for each API response so that we don't overwrite properties on a node set by another sync job. This PR can be reviewed commit-by-commit: - c0d9ac4cc35fd93e260964cfa41a813e2a2032a7 shows a repro of the error with a failing integration test. - facb63bcbac6b68eec0fd2ea3f6b0550ac40eb10 shows the solution using multiple classes. --------- Co-authored-by: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> --- cartography/intel/aws/ec2/instances.py | 4 +- cartography/intel/aws/ec2/volumes.py | 118 +++++------ cartography/intel/aws/util/arns.py | 18 ++ cartography/models/aws/ec2/volumes.py | 46 ++++- tests/data/aws/ec2/volumes.py | 4 +- .../intel/aws/ec2/test_ec2_volumes.py | 194 +++++++++++------- 6 files changed, 237 insertions(+), 147 deletions(-) create mode 100644 cartography/intel/aws/util/arns.py diff --git a/cartography/intel/aws/ec2/instances.py b/cartography/intel/aws/ec2/instances.py index 1c69eb06a..87c7e3202 100644 --- a/cartography/intel/aws/ec2/instances.py +++ b/cartography/intel/aws/ec2/instances.py @@ -17,7 +17,7 @@ from cartography.models.aws.ec2.reservations import EC2ReservationSchema from cartography.models.aws.ec2.securitygroups import EC2SecurityGroupSchema from cartography.models.aws.ec2.subnets import EC2SubnetSchema -from cartography.models.aws.ec2.volumes import EBSVolumeSchema +from cartography.models.aws.ec2.volumes import EBSVolumeInstanceSchema from cartography.util import aws_handle_regions from cartography.util import timeit @@ -273,7 +273,7 @@ def load_ec2_instance_ebs_volumes( ) -> None: load( neo4j_session, - EBSVolumeSchema(), + EBSVolumeInstanceSchema(), ebs_data, Region=region, AWS_ID=current_aws_account_id, diff --git a/cartography/intel/aws/ec2/volumes.py b/cartography/intel/aws/ec2/volumes.py index 6de03c7d4..3ad50f118 100644 --- a/cartography/intel/aws/ec2/volumes.py +++ b/cartography/intel/aws/ec2/volumes.py @@ -6,7 +6,9 @@ import boto3 import neo4j +from cartography.client.core.tx import load from cartography.graph.job import GraphJob +from cartography.intel.aws.util.arns import build_arn from cartography.models.aws.ec2.volumes import EBSVolumeSchema from cartography.util import aws_handle_regions from cartography.util import timeit @@ -16,7 +18,7 @@ @timeit @aws_handle_regions -def get_volumes(boto3_session: boto3.session.Session, region: str) -> List[Dict]: +def get_volumes(boto3_session: boto3.session.Session, region: str) -> List[Dict[str, Any]]: client = boto3_session.client('ec2', region_name=region) paginator = client.get_paginator('describe_volumes') volumes: List[Dict] = [] @@ -26,90 +28,76 @@ def get_volumes(boto3_session: boto3.session.Session, region: str) -> List[Dict] def transform_volumes(volumes: List[Dict[str, Any]], region: str, current_aws_account_id: str) -> List[Dict[str, Any]]: + result = [] for volume in volumes: - volume['VolumeArn'] = f"arn:aws:ec2:{region}:{current_aws_account_id}:volume/{volume['VolumeId']}" - volume['CreateTime'] = str(volume['CreateTime']) - return volumes + attachments = volume.get('Attachments', []) + active_attachments = [a for a in attachments if a['State'] == 'attached'] + + volume_id = volume['VolumeId'] + raw_vol = ({ + 'Arn': build_arn('ec2', current_aws_account_id, 'volume', volume_id, region), + 'AvailabilityZone': volume['AvailabilityZone'], + 'CreateTime': volume['CreateTime'], + 'Encrypted': volume['Encrypted'], + 'Size': volume['Size'], + 'State': volume['State'], + 'OutpostArn': volume['OutpostArn'], + 'SnapshotId': volume['SnapshotId'], + 'Iops': volume['Iops'], + 'FastRestored': volume['FastRestored'], + 'MultiAttachEnabled': volume['MultiAttachEnabled'], + 'VolumeType': volume['VolumeType'], + 'VolumeId': volume_id, + 'KmsKeyId': volume['KmsKeyId'], + }) + + if not active_attachments: + result.append(raw_vol) + continue + + for attachment in active_attachments: + vol_with_attachment = raw_vol.copy() + vol_with_attachment['InstanceId'] = attachment['InstanceId'] + result.append(vol_with_attachment) + + return result @timeit def load_volumes( - neo4j_session: neo4j.Session, data: List[Dict], region: str, current_aws_account_id: str, update_tag: int, + neo4j_session: neo4j.Session, + ebs_data: List[Dict[str, Any]], + region: str, + current_aws_account_id: str, + update_tag: int, ) -> None: - ingest_volumes = """ - UNWIND $volumes_list as volume - MERGE (vol:EBSVolume{id: volume.VolumeId}) - ON CREATE SET vol.firstseen = timestamp() - SET vol.arn = volume.VolumeArn, - vol.lastupdated = $update_tag, - vol.availabilityzone = volume.AvailabilityZone, - vol.createtime = volume.CreateTime, - vol.encrypted = volume.Encrypted, - vol.size = volume.Size, - vol.state = volume.State, - vol.outpostarn = volume.OutpostArn, - vol.snapshotid = volume.SnapshotId, - vol.iops = volume.Iops, - vol.fastrestored = volume.FastRestored, - vol.multiattachenabled = volume.MultiAttachEnabled, - vol.type = volume.VolumeType, - vol.kmskeyid = volume.KmsKeyId, - vol.region=$Region - WITH vol - MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID}) - MERGE (aa)-[r:RESOURCE]->(vol) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $update_tag - """ - - neo4j_session.run( - ingest_volumes, - volumes_list=data, - AWS_ACCOUNT_ID=current_aws_account_id, + load( + neo4j_session, + EBSVolumeSchema(), + ebs_data, Region=region, - update_tag=update_tag, + AWS_ID=current_aws_account_id, + lastupdated=update_tag, ) -def load_volume_relationships( - neo4j_session: neo4j.Session, - volumes: List[Dict[str, Any]], - aws_update_tag: int, -) -> None: - add_relationship_query = """ - MATCH (volume:EBSVolume{arn: $VolumeArn}) - WITH volume - MATCH (instance:EC2Instance{instanceid: $InstanceId}) - MERGE (volume)-[r:ATTACHED_TO_EC2_INSTANCE]->(instance) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $aws_update_tag - """ - for volume in volumes: - for attachment in volume.get('Attachments', []): - if attachment['State'] != 'attached': - continue - neo4j_session.run( - add_relationship_query, - VolumeArn=volume['VolumeArn'], - InstanceId=attachment['InstanceId'], - aws_update_tag=aws_update_tag, - ) - - @timeit -def cleanup_volumes(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None: +def cleanup_volumes(neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]) -> None: GraphJob.from_node_schema(EBSVolumeSchema(), common_job_parameters).run(neo4j_session) @timeit def sync_ebs_volumes( - neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str], - current_aws_account_id: str, update_tag: int, common_job_parameters: Dict, + neo4j_session: neo4j.Session, + boto3_session: boto3.session.Session, + regions: List[str], + current_aws_account_id: str, + update_tag: int, + common_job_parameters: Dict[str, Any], ) -> None: for region in regions: logger.debug("Syncing volumes for region '%s' in account '%s'.", region, current_aws_account_id) data = get_volumes(boto3_session, region) transformed_data = transform_volumes(data, region, current_aws_account_id) load_volumes(neo4j_session, transformed_data, region, current_aws_account_id, update_tag) - load_volume_relationships(neo4j_session, transformed_data, update_tag) cleanup_volumes(neo4j_session, common_job_parameters) diff --git a/cartography/intel/aws/util/arns.py b/cartography/intel/aws/util/arns.py new file mode 100644 index 000000000..e6108b82c --- /dev/null +++ b/cartography/intel/aws/util/arns.py @@ -0,0 +1,18 @@ +from typing import Optional + + +def build_arn( + resource: str, + account: str, + typename: str, + name: str, + region: Optional[str] = None, + partition: Optional[str] = None, +) -> str: + if not partition: + # TODO: support partitions from others. Please file an issue on this if needed, would love to hear from you + partition = 'aws' + if not region: + # Some resources are present in all regions, e.g. IAM policies + region = "" + return f"arn:{partition}:{resource}:{region}:{account}:{typename}/{name}" diff --git a/cartography/models/aws/ec2/volumes.py b/cartography/models/aws/ec2/volumes.py index 2140f4fcd..bb6925780 100644 --- a/cartography/models/aws/ec2/volumes.py +++ b/cartography/models/aws/ec2/volumes.py @@ -13,10 +13,23 @@ @dataclass(frozen=True) class EBSVolumeNodeProperties(CartographyNodeProperties): + arn: PropertyRef = PropertyRef('Arn', extra_index=True) id: PropertyRef = PropertyRef('VolumeId') + volumeid: PropertyRef = PropertyRef('VolumeId', extra_index=True) region: PropertyRef = PropertyRef('Region', set_in_kwargs=True) lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) - deleteontermination: PropertyRef = PropertyRef('DeleteOnTermination') + availabilityzone: PropertyRef = PropertyRef('AvailabilityZone') + createtime: PropertyRef = PropertyRef('CreateTime') + encrypted: PropertyRef = PropertyRef('Encrypted') + size: PropertyRef = PropertyRef('Size') + state: PropertyRef = PropertyRef('State') + outpostarn: PropertyRef = PropertyRef('OutpostArn') + snapshotid: PropertyRef = PropertyRef('SnapshotId') + iops: PropertyRef = PropertyRef('Iops') + fastrestored: PropertyRef = PropertyRef('FastRestored') + multiattachenabled: PropertyRef = PropertyRef('MultiAttachEnabled') + type: PropertyRef = PropertyRef('VolumeType') + kmskeyid: PropertyRef = PropertyRef('KmsKeyId') @dataclass(frozen=True) @@ -53,6 +66,9 @@ class EBSVolumeToEC2Instance(CartographyRelSchema): @dataclass(frozen=True) class EBSVolumeSchema(CartographyNodeSchema): + """ + EBS Volume properties as returned from the EBS Volume API response + """ label: str = 'EBSVolume' properties: EBSVolumeNodeProperties = EBSVolumeNodeProperties() sub_resource_relationship: EBSVolumeToAWSAccount = EBSVolumeToAWSAccount() @@ -61,3 +77,31 @@ class EBSVolumeSchema(CartographyNodeSchema): EBSVolumeToEC2Instance(), ], ) + + +@dataclass(frozen=True) +class EBSVolumeInstanceProperties(CartographyNodeProperties): + """ + EBS Volume properties as known by an EC2 instance. + The EC2 instance API response includes a `deleteontermination` field and the volume id. + """ + arn: PropertyRef = PropertyRef('Arn', extra_index=True) + id: PropertyRef = PropertyRef('VolumeId') + volumeid: PropertyRef = PropertyRef('VolumeId', extra_index=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + deleteontermination: PropertyRef = PropertyRef('DeleteOnTermination') + + +@dataclass(frozen=True) +class EBSVolumeInstanceSchema(CartographyNodeSchema): + """ + EBS Volume from EC2 Instance API response. This is separate from `EBSVolumeSchema` to prevent issue #1210. + """ + label: str = 'EBSVolume' + properties: EBSVolumeInstanceProperties = EBSVolumeInstanceProperties() + sub_resource_relationship: EBSVolumeToAWSAccount = EBSVolumeToAWSAccount() + other_relationships: OtherRelationships = OtherRelationships( + [ + EBSVolumeToEC2Instance(), + ], + ) diff --git a/tests/data/aws/ec2/volumes.py b/tests/data/aws/ec2/volumes.py index e3a29c03f..6e08003e7 100644 --- a/tests/data/aws/ec2/volumes.py +++ b/tests/data/aws/ec2/volumes.py @@ -14,7 +14,7 @@ 'Size': 123, 'SnapshotId': 'sn-01', 'State': 'available', - 'VolumeId': 'v-01', + 'VolumeId': 'vol-0df', 'Iops': 123, 'VolumeType': 'standard', 'FastRestored': True, @@ -33,7 +33,7 @@ 'OutpostArn': 'arn1', 'Size': 123, 'State': 'available', - 'VolumeId': 'v-02', + 'VolumeId': 'vol-03', 'Iops': 123, 'SnapshotId': 'sn-02', 'VolumeType': 'standard', diff --git a/tests/integration/cartography/intel/aws/ec2/test_ec2_volumes.py b/tests/integration/cartography/intel/aws/ec2/test_ec2_volumes.py index 705d2db3e..b85258130 100644 --- a/tests/integration/cartography/intel/aws/ec2/test_ec2_volumes.py +++ b/tests/integration/cartography/intel/aws/ec2/test_ec2_volumes.py @@ -3,94 +3,116 @@ import cartography.intel.aws.ec2.instances import cartography.intel.aws.ec2.volumes -import tests.data.aws.ec2.instances -import tests.data.aws.ec2.volumes from cartography.intel.aws.ec2.instances import sync_ec2_instances +from cartography.intel.aws.ec2.volumes import sync_ebs_volumes from tests.data.aws.ec2.instances import DESCRIBE_INSTANCES - +from tests.data.aws.ec2.volumes import DESCRIBE_VOLUMES +from tests.integration.cartography.intel.aws.common import create_test_account +from tests.integration.util import check_nodes +from tests.integration.util import check_rels TEST_ACCOUNT_ID = '000000000000' TEST_REGION = 'eu-west-1' TEST_UPDATE_TAG = 123456789 -def test_load_volumes(neo4j_session): +@patch.object(cartography.intel.aws.ec2.volumes, 'get_volumes', return_value=DESCRIBE_VOLUMES) +def test_sync_ebs_volumes(mock_get_vols, neo4j_session): # Arrange - data = tests.data.aws.ec2.volumes.DESCRIBE_VOLUMES - transformed_data = cartography.intel.aws.ec2.volumes.transform_volumes(data, TEST_REGION, TEST_ACCOUNT_ID) + boto3_session = MagicMock() + create_test_account(neo4j_session, TEST_ACCOUNT_ID, TEST_UPDATE_TAG) # Act - cartography.intel.aws.ec2.volumes.load_volumes( + sync_ebs_volumes( neo4j_session, - transformed_data, - TEST_REGION, + boto3_session, + [TEST_REGION], TEST_ACCOUNT_ID, TEST_UPDATE_TAG, + {'UPDATE_TAG': TEST_UPDATE_TAG, 'AWS_ID': TEST_ACCOUNT_ID}, ) # Assert - expected_nodes = { - "v-01", "v-02", + assert check_nodes(neo4j_session, 'EBSVolume', ['arn']) == { + ('arn:aws:ec2:eu-west-1:000000000000:volume/vol-03',), + ('arn:aws:ec2:eu-west-1:000000000000:volume/vol-0df',), } - nodes = neo4j_session.run( - """ - MATCH (r:EBSVolume) RETURN r.id; - """, - ) - actual_nodes = {n['r.id'] for n in nodes} - - assert actual_nodes == expected_nodes - - -def test_load_volume_to_account_rels(neo4j_session): + # Assert + assert check_rels( + neo4j_session, + 'AWSAccount', + 'id', + 'EBSVolume', + 'volumeid', + 'RESOURCE', + rel_direction_right=True, + ) == { + (TEST_ACCOUNT_ID, 'vol-03'), + (TEST_ACCOUNT_ID, 'vol-0df'), + } - # Arrange: Create Test AWSAccount - neo4j_session.run( - """ - MERGE (aws:AWSAccount{id: $aws_account_id}) - ON CREATE SET aws.firstseen = timestamp() - SET aws.lastupdated = $aws_update_tag - """, - aws_account_id=TEST_ACCOUNT_ID, - aws_update_tag=TEST_UPDATE_TAG, - ) - # Act: Load Test Volumes - data = tests.data.aws.ec2.volumes.DESCRIBE_VOLUMES - transformed_data = cartography.intel.aws.ec2.volumes.transform_volumes(data, TEST_REGION, TEST_ACCOUNT_ID) +@patch.object(cartography.intel.aws.ec2.instances, 'get_ec2_instances', return_value=DESCRIBE_INSTANCES['Reservations']) +@patch.object(cartography.intel.aws.ec2.volumes, 'get_volumes', return_value=DESCRIBE_VOLUMES) +def test_sync_ebs_volumes_e2e(mock_get_vols, mock_get_instances, neo4j_session): + # Arrange + neo4j_session.run('MATCH (n) DETACH DELETE n;') + boto3_session = MagicMock() + create_test_account(neo4j_session, TEST_ACCOUNT_ID, TEST_UPDATE_TAG) - cartography.intel.aws.ec2.volumes.load_volumes( + # Act: sync_ec2_instances() loads attached ebs volumes + sync_ec2_instances( neo4j_session, - transformed_data, - TEST_REGION, + boto3_session, + [TEST_REGION], TEST_ACCOUNT_ID, TEST_UPDATE_TAG, + {'UPDATE_TAG': TEST_UPDATE_TAG, 'AWS_ID': TEST_ACCOUNT_ID}, ) - # Assert - expected = { - (TEST_ACCOUNT_ID, 'v-01'), - (TEST_ACCOUNT_ID, 'v-02'), + # Assert that deleteontermination is set by sync_ec2_instances. The encrypted property isn't returned by this API. + assert check_nodes(neo4j_session, 'EBSVolume', ['id', 'deleteontermination', 'encrypted']) == { + ('vol-03', True, None), + ('vol-04', True, None), + ('vol-09', True, None), + ('vol-0df', True, None), } - result = neo4j_session.run( - """ - MATCH (n1:AWSAccount)-[:RESOURCE]->(n2:EBSVolume) RETURN n1.id, n2.id; - """, - ) - actual = { - (r['n1.id'], r['n2.id']) for r in result + # Assert that they are attached to the instances + assert check_rels( + neo4j_session, + 'EC2Instance', + 'instanceid', + 'EBSVolume', + 'volumeid', + 'ATTACHED_TO', + rel_direction_right=False, + ) == { + ('i-01', 'vol-0df'), + ('i-02', 'vol-03'), + ('i-03', 'vol-09'), + ('i-04', 'vol-04'), } - assert actual == expected - + # Assert that we created the account to volume rels correctly + assert check_rels( + neo4j_session, + 'AWSAccount', + 'id', + 'EBSVolume', + 'volumeid', + 'RESOURCE', + rel_direction_right=True, + ) == { + ('000000000000', 'vol-03'), + ('000000000000', 'vol-04'), + ('000000000000', 'vol-09'), + ('000000000000', 'vol-0df'), + } -@patch.object(cartography.intel.aws.ec2.instances, 'get_ec2_instances', return_value=DESCRIBE_INSTANCES['Reservations']) -def test_load_volume_to_instance_rels(mock_get_instances, neo4j_session): - # Arrange: Load in ec2 instances first - boto3_session = MagicMock() - sync_ec2_instances( + # Act + sync_ebs_volumes( neo4j_session, boto3_session, [TEST_REGION], @@ -98,28 +120,46 @@ def test_load_volume_to_instance_rels(mock_get_instances, neo4j_session): TEST_UPDATE_TAG, {'UPDATE_TAG': TEST_UPDATE_TAG, 'AWS_ID': TEST_ACCOUNT_ID}, ) - # Prep the volume data - raw_volumes = tests.data.aws.ec2.volumes.DESCRIBE_VOLUMES - transformed_volumes = cartography.intel.aws.ec2.volumes.transform_volumes(raw_volumes, TEST_REGION, TEST_ACCOUNT_ID) - # Act - cartography.intel.aws.ec2.volumes.load_volume_relationships( - neo4j_session, - transformed_volumes, - TEST_UPDATE_TAG, - ) + # Assert that additional fields such as `encrypted` have been added by sync_ebs_volumes(), while + # deleteontermination has not been overwritten with None by sync_ebs_volumes() + assert check_nodes(neo4j_session, 'EBSVolume', ['id', 'deleteontermination', 'encrypted']) == { + # Attached to the instances initially + ('vol-04', True, None), + ('vol-09', True, None), + # Added by ebs sync + ('vol-03', True, True), + ('vol-0df', True, True), + } - # Assert - result = neo4j_session.run( - """ - MATCH (n1:EC2Instance)<-[:ATTACHED_TO_EC2_INSTANCE]-(n2:EBSVolume) RETURN n1.id, n2.id; - """, - ) - expected = { - ('i-01', 'v-01'), - ('i-02', 'v-02'), + # Assert that they are still attached to the instances + assert check_rels( + neo4j_session, + 'EC2Instance', + 'instanceid', + 'EBSVolume', + 'volumeid', + 'ATTACHED_TO', + rel_direction_right=False, + ) == { + ('i-01', 'vol-0df'), + ('i-02', 'vol-03'), + ('i-03', 'vol-09'), + ('i-04', 'vol-04'), } - actual = { - (r['n1.id'], r['n2.id']) for r in result + + # Assert that the account to volume rels still exist + assert check_rels( + neo4j_session, + 'AWSAccount', + 'id', + 'EBSVolume', + 'volumeid', + 'RESOURCE', + rel_direction_right=True, + ) == { + ('000000000000', 'vol-03'), + ('000000000000', 'vol-04'), + ('000000000000', 'vol-09'), + ('000000000000', 'vol-0df'), } - assert actual == expected From e245b101cc7438018370a7b49ba8fa02a6f96f4a Mon Sep 17 00:00:00 2001 From: Dean Liu Date: Mon, 17 Jul 2023 11:08:29 -0700 Subject: [PATCH 10/24] Fix index out of range for drift detection returning no results (#1220) It's possible for neo4j sessions `read_transaction` in `get_state` to return an empty list in the drift detection module. This PR ensures that there are entries before referencing index 0. ``` File "/code/venvs/venv/lib/python3.8/site-packages/cartography/driftdetect/get_states.py", line 123, in get_query_state get_state(session, state) File "/code/venvs/venv/lib/python3.8/site-packages/cartography/driftdetect/get_states.py", line 148, in get_state state.properties = list(new_results[0].keys()) IndexError: list index out of range ``` --- cartography/driftdetect/get_states.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cartography/driftdetect/get_states.py b/cartography/driftdetect/get_states.py index eaa40bbba..b7e057561 100644 --- a/cartography/driftdetect/get_states.py +++ b/cartography/driftdetect/get_states.py @@ -145,9 +145,9 @@ def get_state(session: neo4j.Session, state: State) -> None: logger.debug(f"Updating results for {state.name}") # The keys will be the same across all items in the returned list - state.properties = list(new_results[0].keys()) - results = [] + state.properties = list(new_results[0].keys()) if len(new_results) > 0 else [] + results = [] for record in new_results: values = [] for field in record.values(): From f889b5348231b2fdc6cd42891e5443956f8c49ea Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Mon, 24 Jul 2023 08:56:23 -0700 Subject: [PATCH 11/24] Add contributing guidelines for issues (#1226) Instruct how we plan to use the Discussions feature --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index cac7c3d26..11208559f 100644 --- a/README.md +++ b/README.md @@ -54,9 +54,12 @@ Thank you for considering contributing to Cartography! Legal stuff: This project is governed by [Lyft's code of conduct](https://github.com/lyft/code-of-conduct). All contributors and participants agree to abide by its terms. +### Bug reports and feature requests and discussions +Submit a GitHub issue to report a bug or request a new feature. If we decide that the issue needs more discussion - usually because the scope is too large or we need to make careful decision - we will convert the issue to a [GitHub Discussion](https://github.com/lyft/cartography/discussions). + ### Developing Cartography -Get started with our [developer documentation](https://lyft.github.io/cartography/dev/developer-guide.html). +Get started with our [developer documentation](https://lyft.github.io/cartography/dev/developer-guide.html). Please feel free to submit your own PRs to update documentation if you've found a better way to explain something. #### Sign the Contributor License Agreement (CLA) From 40b075a9cd3dcfab952e3f4a36bd02785a2f621f Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Tue, 1 Aug 2023 14:42:31 -0400 Subject: [PATCH 12/24] add throttling and typing --- cartography/util.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/cartography/util.py b/cartography/util.py index d5ab33464..755223f57 100644 --- a/cartography/util.py +++ b/cartography/util.py @@ -145,7 +145,8 @@ def load_resource_binary(package: str, resource_name: str) -> BinaryIO: return open_binary(package, resource_name) -F = TypeVar('F', bound=Callable[..., Any]) +R = TypeVar('R') +F = TypeVar('F', bound=Callable[..., R]) def timeit(method: F) -> F: @@ -156,12 +157,12 @@ def timeit(method: F) -> F: """ # Allow access via `inspect` to the wrapped function. This is used in integration tests to standardize param names. @wraps(method) - def timed(*args, **kwargs): # type: ignore + def timed(*args, **kwargs) -> R: # type: ignore stats_client = get_stats_client(method.__module__) if stats_client.is_enabled(): timer = stats_client.timer(method.__name__) timer.start() - result = method(*args, **kwargs) + result: R = method(*args, **kwargs) timer.stop() return result else: @@ -303,11 +304,13 @@ def batch(items: Iterable, size: int = DEFAULT_BATCH_SIZE) -> List[List]: ] -def to_async(func: Callable, *args: Any, **kwargs: Any) -> asyncio.Future: +def to_async(func: F, *args: Any, **kwargs: Any) -> Awaitable[R]: ''' Returns a Future that will run a function in the default threadpool. Helper until we start using pytohn 3.9's asyncio.to_thread + Calls are also wrapped within a backoff decorator to handle throttling errors. + example: future = to_async(my_func, my_arg, my_arg2) to_sync(future) @@ -316,7 +319,21 @@ def to_async(func: Callable, *args: Any, **kwargs: Any) -> asyncio.Future: # import nest_asyncio # nest_asyncio.apply() ''' - call = partial(func, *args, **kwargs) + CartographyThrottlingException = type('CartographyThrottlingException', (Exception,), {}) + throttling_error_codes = ['LimitExceededException', 'Throttling'] + + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + try: + return func(*args, **kwargs) + except botocore.exceptions.ClientError as error: + if error.response['Error']['Code'] in throttling_error_codes: + raise CartographyThrottlingException from error + raise + + # don't use @backoff as decorator, to preserve typing + wrapped = backoff.on_exception(backoff.expo, CartographyThrottlingException)(wrapper) + call = partial(wrapped, *args, **kwargs) return asyncio.get_event_loop().run_in_executor(None, call) From da5c68d6349cd921b4556c74e1c0379d53da2ab3 Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Wed, 2 Aug 2023 15:18:12 -0400 Subject: [PATCH 13/24] add boto3_session fixture with stubbed clients --- cartography/intel/aws/ecr.py | 25 ++++++++++---- cartography/util.py | 10 +++--- tests/unit/conftest.py | 66 ++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 11 deletions(-) create mode 100644 tests/unit/conftest.py diff --git a/cartography/intel/aws/ecr.py b/cartography/intel/aws/ecr.py index dae4be6ae..9aa9ab8b2 100644 --- a/cartography/intel/aws/ecr.py +++ b/cartography/intel/aws/ecr.py @@ -141,6 +141,24 @@ def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None: logger.debug("Running ECR cleanup job.") run_cleanup_job('aws_import_ecr_cleanup.json', neo4j_session, common_job_parameters) +# get the image_data given a list of repositores + + +def _get_image_data(boto3_session: boto3.session.Session, region: str, repositories: List[Dict]) -> Dict: + ''' + Given a list of repositories, get the image data for each repository, + return as a mapping from repositoryUri to image object + ''' + image_data = {} + repositories = get_ecr_repositories(boto3_session, region) + + async def async_get_images(repo: Dict[str, Any]) -> None: + repo_image_obj = await to_async(get_ecr_repository_images, boto3_session, region, repo['repositoryName']) + image_data[repo['repositoryUri']] = repo_image_obj + to_sync(*[async_get_images(repo) for repo in repositories]) + + return image_data + @timeit def sync( @@ -151,12 +169,7 @@ def sync( logger.info("Syncing ECR for region '%s' in account '%s'.", region, current_aws_account_id) image_data = {} repositories = get_ecr_repositories(boto3_session, region) - - async def async_get_images(repo: Dict[str, Any]) -> None: - repo_image_obj = await to_async(get_ecr_repository_images, boto3_session, region, repo['repositoryName']) - image_data[repo['repositoryUri']] = repo_image_obj - to_sync(*[async_get_images(repo) for repo in repositories]) - + image_data = _get_image_data(boto3_session, region, repositories) load_ecr_repositories(neo4j_session, repositories, region, current_aws_account_id, update_tag) repo_images_list = transform_ecr_repository_images(image_data) load_ecr_repository_images(neo4j_session, repo_images_list, region, update_tag) diff --git a/cartography/util.py b/cartography/util.py index 755223f57..2c83561e1 100644 --- a/cartography/util.py +++ b/cartography/util.py @@ -146,7 +146,7 @@ def load_resource_binary(package: str, resource_name: str) -> BinaryIO: R = TypeVar('R') -F = TypeVar('F', bound=Callable[..., R]) +F = TypeVar('F', bound=Callable[..., Any]) def timeit(method: F) -> F: @@ -157,12 +157,12 @@ def timeit(method: F) -> F: """ # Allow access via `inspect` to the wrapped function. This is used in integration tests to standardize param names. @wraps(method) - def timed(*args, **kwargs) -> R: # type: ignore + def timed(*args, **kwargs): # type: ignore stats_client = get_stats_client(method.__module__) if stats_client.is_enabled(): timer = stats_client.timer(method.__name__) timer.start() - result: R = method(*args, **kwargs) + result = method(*args, **kwargs) timer.stop() return result else: @@ -304,7 +304,7 @@ def batch(items: Iterable, size: int = DEFAULT_BATCH_SIZE) -> List[List]: ] -def to_async(func: F, *args: Any, **kwargs: Any) -> Awaitable[R]: +def to_async(func: Callable[..., R], *args: Any, **kwargs: Any) -> Awaitable[R]: ''' Returns a Future that will run a function in the default threadpool. Helper until we start using pytohn 3.9's asyncio.to_thread @@ -323,7 +323,7 @@ def to_async(func: F, *args: Any, **kwargs: Any) -> Awaitable[R]: throttling_error_codes = ['LimitExceededException', 'Throttling'] @wraps(func) - def wrapper(*args: Any, **kwargs: Any) -> Any: + def wrapper(*args: Any, **kwargs: Any) -> R: try: return func(*args, **kwargs) except botocore.exceptions.ClientError as error: diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py new file mode 100644 index 000000000..13a5aef94 --- /dev/null +++ b/tests/unit/conftest.py @@ -0,0 +1,66 @@ +import logging +from unittest.mock import Mock + +import boto3 +import pytest +from botocore.stub import Stubber + +from tests.data.aws.ecr import DESCRIBE_REPOSITORIES +from tests.data.aws.ecr import LIST_REPOSITORY_IMAGES + + +logging.basicConfig(level=logging.INFO) +logging.getLogger('botocore').setLevel(logging.WARNING) + + +DEFAULT_REGION = 'us-east-1' + + +@pytest.fixture +def boto3_session(): + ''' + Create a mock boto3 session that returns stubbed clients + ''' + stubbed_clients = { + DEFAULT_REGION: { + service: make_stubbed_client(service, DEFAULT_REGION) + for service in [ + 'ecr', + ] + }, + } + mock_boto3_session = Mock( + client=Mock( + side_effect=lambda service, region_name: stubbed_clients[region_name][service], + ), + ) + yield mock_boto3_session + + +def make_stubbed_client(service: str, region_name: str = DEFAULT_REGION): + ''' + Create a boto3 client with stubbed responses + ''' + client = boto3.client(service, region_name=region_name) + stubber_funcs = { + 'ecr': stub_ecr, + } + stubber_funcs[service](client) + return client + + +def stub_ecr(client: boto3.client) -> Stubber: + ''' + Handle the stubbing of an ecr client + ''' + stubber = Stubber(client) + stubber.add_response("describe_repositories", DESCRIBE_REPOSITORIES) + for repo_arn, image_list in LIST_REPOSITORY_IMAGES.items(): + repo_name = repo_arn.split("/", maxsplit=1)[-1] + stubber.add_response( + "list_images", + {"imageIds": image_list}, + {"repositoryName": repo_name}, + ) + stubber.activate() + return stubber From 4c89a3571d5cf88d6582372eccca21a1f59df052 Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Wed, 2 Aug 2023 15:18:48 -0400 Subject: [PATCH 14/24] cleanup --- cartography/intel/aws/ecr.py | 3 -- tests/unit/conftest.py | 66 ------------------------------------ 2 files changed, 69 deletions(-) delete mode 100644 tests/unit/conftest.py diff --git a/cartography/intel/aws/ecr.py b/cartography/intel/aws/ecr.py index 9aa9ab8b2..0a1e1d133 100644 --- a/cartography/intel/aws/ecr.py +++ b/cartography/intel/aws/ecr.py @@ -141,8 +141,6 @@ def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None: logger.debug("Running ECR cleanup job.") run_cleanup_job('aws_import_ecr_cleanup.json', neo4j_session, common_job_parameters) -# get the image_data given a list of repositores - def _get_image_data(boto3_session: boto3.session.Session, region: str, repositories: List[Dict]) -> Dict: ''' @@ -150,7 +148,6 @@ def _get_image_data(boto3_session: boto3.session.Session, region: str, repositor return as a mapping from repositoryUri to image object ''' image_data = {} - repositories = get_ecr_repositories(boto3_session, region) async def async_get_images(repo: Dict[str, Any]) -> None: repo_image_obj = await to_async(get_ecr_repository_images, boto3_session, region, repo['repositoryName']) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py deleted file mode 100644 index 13a5aef94..000000000 --- a/tests/unit/conftest.py +++ /dev/null @@ -1,66 +0,0 @@ -import logging -from unittest.mock import Mock - -import boto3 -import pytest -from botocore.stub import Stubber - -from tests.data.aws.ecr import DESCRIBE_REPOSITORIES -from tests.data.aws.ecr import LIST_REPOSITORY_IMAGES - - -logging.basicConfig(level=logging.INFO) -logging.getLogger('botocore').setLevel(logging.WARNING) - - -DEFAULT_REGION = 'us-east-1' - - -@pytest.fixture -def boto3_session(): - ''' - Create a mock boto3 session that returns stubbed clients - ''' - stubbed_clients = { - DEFAULT_REGION: { - service: make_stubbed_client(service, DEFAULT_REGION) - for service in [ - 'ecr', - ] - }, - } - mock_boto3_session = Mock( - client=Mock( - side_effect=lambda service, region_name: stubbed_clients[region_name][service], - ), - ) - yield mock_boto3_session - - -def make_stubbed_client(service: str, region_name: str = DEFAULT_REGION): - ''' - Create a boto3 client with stubbed responses - ''' - client = boto3.client(service, region_name=region_name) - stubber_funcs = { - 'ecr': stub_ecr, - } - stubber_funcs[service](client) - return client - - -def stub_ecr(client: boto3.client) -> Stubber: - ''' - Handle the stubbing of an ecr client - ''' - stubber = Stubber(client) - stubber.add_response("describe_repositories", DESCRIBE_REPOSITORIES) - for repo_arn, image_list in LIST_REPOSITORY_IMAGES.items(): - repo_name = repo_arn.split("/", maxsplit=1)[-1] - stubber.add_response( - "list_images", - {"imageIds": image_list}, - {"repositoryName": repo_name}, - ) - stubber.activate() - return stubber From c8273f1ae6598d7157317ae004904ba8c44e51ce Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Thu, 3 Aug 2023 14:52:52 -0400 Subject: [PATCH 15/24] rename --- cartography/intel/aws/ecr.py | 8 ++++---- cartography/intel/aws/s3.py | 16 ++++++++-------- cartography/util.py | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cartography/intel/aws/ecr.py b/cartography/intel/aws/ecr.py index 0a1e1d133..a1efc2559 100644 --- a/cartography/intel/aws/ecr.py +++ b/cartography/intel/aws/ecr.py @@ -10,8 +10,8 @@ from cartography.util import batch from cartography.util import run_cleanup_job from cartography.util import timeit -from cartography.util import to_async -from cartography.util import to_sync +from cartography.util import to_asynchronous +from cartography.util import to_synchronous logger = logging.getLogger(__name__) @@ -150,9 +150,9 @@ def _get_image_data(boto3_session: boto3.session.Session, region: str, repositor image_data = {} async def async_get_images(repo: Dict[str, Any]) -> None: - repo_image_obj = await to_async(get_ecr_repository_images, boto3_session, region, repo['repositoryName']) + repo_image_obj = await to_asynchronous(get_ecr_repository_images, boto3_session, region, repo['repositoryName']) image_data[repo['repositoryUri']] = repo_image_obj - to_sync(*[async_get_images(repo) for repo in repositories]) + to_synchronous(*[async_get_images(repo) for repo in repositories]) return image_data diff --git a/cartography/intel/aws/s3.py b/cartography/intel/aws/s3.py index d19da8032..6db11cc9f 100644 --- a/cartography/intel/aws/s3.py +++ b/cartography/intel/aws/s3.py @@ -21,8 +21,8 @@ from cartography.util import run_analysis_job from cartography.util import run_cleanup_job from cartography.util import timeit -from cartography.util import to_async -from cartography.util import to_sync +from cartography.util import to_asynchronous +from cartography.util import to_synchronous logger = logging.getLogger(__name__) stat_handler = get_stats_client(__name__) @@ -75,15 +75,15 @@ async def _get_bucket_detail(bucket: Dict[str, Any]) -> BucketDetail: versioning, public_access_block, ) = await asyncio.gather( - to_async(get_acl, bucket, client), - to_async(get_policy, bucket, client), - to_async(get_encryption, bucket, client), - to_async(get_versioning, bucket, client), - to_async(get_public_access_block, bucket, client), + to_asynchronous(get_acl, bucket, client), + to_asynchronous(get_policy, bucket, client), + to_asynchronous(get_encryption, bucket, client), + to_asynchronous(get_versioning, bucket, client), + to_asynchronous(get_public_access_block, bucket, client), ) return bucket['Name'], acl, policy, encryption, versioning, public_access_block - bucket_details = to_sync(*[_get_bucket_detail(bucket) for bucket in bucket_data['Buckets']]) + bucket_details = to_synchronous(*[_get_bucket_detail(bucket) for bucket in bucket_data['Buckets']]) yield from bucket_details diff --git a/cartography/util.py b/cartography/util.py index 2c83561e1..8dacf6233 100644 --- a/cartography/util.py +++ b/cartography/util.py @@ -304,7 +304,7 @@ def batch(items: Iterable, size: int = DEFAULT_BATCH_SIZE) -> List[List]: ] -def to_async(func: Callable[..., R], *args: Any, **kwargs: Any) -> Awaitable[R]: +def to_asynchronous(func: Callable[..., R], *args: Any, **kwargs: Any) -> Awaitable[R]: ''' Returns a Future that will run a function in the default threadpool. Helper until we start using pytohn 3.9's asyncio.to_thread @@ -337,7 +337,7 @@ def wrapper(*args: Any, **kwargs: Any) -> R: return asyncio.get_event_loop().run_in_executor(None, call) -def to_sync(*awaitables: Awaitable[Any]) -> Any: +def to_synchronous(*awaitables: Awaitable[Any]) -> Any: ''' Waits for the Awaitable(s) to complete and returns their result(s). See https://docs.python.org/3.8/library/asyncio-task.html#asyncio-awaitables From f8eadd0f4dfdafb01c479214c74728084e0bd43a Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Thu, 3 Aug 2023 16:25:51 -0400 Subject: [PATCH 16/24] Update setup.py --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 8b2b7ddfb..bbfdb7953 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,6 @@ from setuptools import find_packages from setuptools import setup - __version__ = '0.82.0' From a92b20ebdae29d6a7c77d73f1dd01afc184441d4 Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Tue, 8 Aug 2023 12:37:34 -0400 Subject: [PATCH 17/24] refactor, add examples --- cartography/util.py | 62 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 11 deletions(-) diff --git a/cartography/util.py b/cartography/util.py index 8dacf6233..7ca494cc5 100644 --- a/cartography/util.py +++ b/cartography/util.py @@ -304,31 +304,58 @@ def batch(items: Iterable, size: int = DEFAULT_BATCH_SIZE) -> List[List]: ] +def is_throttling_exception(exc: Exception) -> bool: + ''' + Returns True if the exception is caused by a client libraries throttling mechanism + ''' + # https://boto3.amazonaws.com/v1/documentation/api/1.19.9/guide/error-handling.html + if isinstance(exc, botocore.exceptions.ClientError): + if exc.response['Error']['Code'] in ['LimitExceededException', 'Throttling']: + return True + # add other exceptions here, if needed, like: + # https://cloud.google.com/python/docs/reference/storage/1.39.0/retry_timeout#configuring-retries + # if isinstance(exc, google.api_core.exceptions.TooManyRequests): + # return True + return False + + def to_asynchronous(func: Callable[..., R], *args: Any, **kwargs: Any) -> Awaitable[R]: ''' - Returns a Future that will run a function in the default threadpool. - Helper until we start using pytohn 3.9's asyncio.to_thread + Returns a Future that will run a function and its arguments in the default threadpool. + Helper until we start using python 3.9's asyncio.to_thread Calls are also wrapped within a backoff decorator to handle throttling errors. example: - future = to_async(my_func, my_arg, my_arg2) - to_sync(future) + def my_func(arg1, arg2, kwarg1): + return arg1 + arg2 + kwarg1 + + # normal synchronous call: + result = my_func(1, 2, kwarg1=3) + + # asynchronous call: + future = to_asynchronous(my_func, 1, 2, kwarg1=3) + + # the result is stored in the future, and can be retrieved + # from within another async function with: + await future + + # or from within a synchronous function with our helper: + to_synchronous(future) NOTE: to use this in a Jupyter notebook, you need to do: # import nest_asyncio # nest_asyncio.apply() ''' CartographyThrottlingException = type('CartographyThrottlingException', (Exception,), {}) - throttling_error_codes = ['LimitExceededException', 'Throttling'] @wraps(func) def wrapper(*args: Any, **kwargs: Any) -> R: try: return func(*args, **kwargs) - except botocore.exceptions.ClientError as error: - if error.response['Error']['Code'] in throttling_error_codes: - raise CartographyThrottlingException from error + except Exception as exc: + if is_throttling_exception(exc): + raise CartographyThrottlingException from exc raise # don't use @backoff as decorator, to preserve typing @@ -337,12 +364,25 @@ def wrapper(*args: Any, **kwargs: Any) -> R: return asyncio.get_event_loop().run_in_executor(None, call) -def to_synchronous(*awaitables: Awaitable[Any]) -> Any: +def to_synchronous(*awaitables: Awaitable[Any]) -> List[Any]: ''' - Waits for the Awaitable(s) to complete and returns their result(s). + Synchronously waits for the Awaitable(s) to complete and returns their result(s). See https://docs.python.org/3.8/library/asyncio-task.html#asyncio-awaitables example: - result = to_sync(my_async_func(my_arg), another_async(my_arg2))) + async def my_async_func(my_arg): + return my_arg + + async def another_async_func(my_arg2): + return my_arg2 + + remember that an invocation of an async function returns a Future (Awaitable), + which needs to be awaited to get the result. You cannot await a Future from within + a non-async function, so you could use this helper to get the result from a Future + + future_1 = my_async_func(1) + future_2 = another_async_func(2) + + results = to_synchronous(future_1, future_2) ''' return asyncio.get_event_loop().run_until_complete(asyncio.gather(*awaitables)) From 92c72ad30268315c5172f2f1dacab48ed2f80670 Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Tue, 8 Aug 2023 12:40:21 -0400 Subject: [PATCH 18/24] typing --- cartography/intel/aws/ecr.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cartography/intel/aws/ecr.py b/cartography/intel/aws/ecr.py index a1efc2559..af51075bd 100644 --- a/cartography/intel/aws/ecr.py +++ b/cartography/intel/aws/ecr.py @@ -142,7 +142,11 @@ def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None: run_cleanup_job('aws_import_ecr_cleanup.json', neo4j_session, common_job_parameters) -def _get_image_data(boto3_session: boto3.session.Session, region: str, repositories: List[Dict]) -> Dict: +def _get_image_data( + boto3_session: boto3.session.Session, + region: str, + repositories: List[Dict[str, Any]], +) -> Dict[str, Any]: ''' Given a list of repositories, get the image data for each repository, return as a mapping from repositoryUri to image object From bdc664f4a7a5c8beb5f1a920a6edc8a0b334276f Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Tue, 8 Aug 2023 12:41:31 -0400 Subject: [PATCH 19/24] whitespace --- cartography/intel/aws/ecr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cartography/intel/aws/ecr.py b/cartography/intel/aws/ecr.py index af51075bd..4569024d5 100644 --- a/cartography/intel/aws/ecr.py +++ b/cartography/intel/aws/ecr.py @@ -149,7 +149,7 @@ def _get_image_data( ) -> Dict[str, Any]: ''' Given a list of repositories, get the image data for each repository, - return as a mapping from repositoryUri to image object + return as a mapping from repositoryUri to image object ''' image_data = {} From 4ac4dfa7247ce73ae06b4187b707cb16c57c241f Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Tue, 22 Aug 2023 17:15:24 -0400 Subject: [PATCH 20/24] add :param : docs --- cartography/util.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cartography/util.py b/cartography/util.py index 7ca494cc5..2ad072f8f 100644 --- a/cartography/util.py +++ b/cartography/util.py @@ -326,6 +326,10 @@ def to_asynchronous(func: Callable[..., R], *args: Any, **kwargs: Any) -> Awaita Calls are also wrapped within a backoff decorator to handle throttling errors. + :param func: the function to be wrapped by the Future + :param args: a series of argumnets to be passed into func + :param kwards: a series of keyword arguments to be passed into func + example: def my_func(arg1, arg2, kwarg1): return arg1 + arg2 + kwarg1 @@ -369,6 +373,8 @@ def to_synchronous(*awaitables: Awaitable[Any]) -> List[Any]: Synchronously waits for the Awaitable(s) to complete and returns their result(s). See https://docs.python.org/3.8/library/asyncio-task.html#asyncio-awaitables + :param awaitables: a series of Awaitable objects, with each object being its own parameter. i.e., not a single list of Awaitables + example: async def my_async_func(my_arg): return my_arg From 128ef7ac4bce4c8ed7dfbe75fc8c462d5908e2f4 Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Tue, 22 Aug 2023 17:16:47 -0400 Subject: [PATCH 21/24] Update util.py --- cartography/util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cartography/util.py b/cartography/util.py index 2ad072f8f..4fdc25674 100644 --- a/cartography/util.py +++ b/cartography/util.py @@ -327,8 +327,8 @@ def to_asynchronous(func: Callable[..., R], *args: Any, **kwargs: Any) -> Awaita Calls are also wrapped within a backoff decorator to handle throttling errors. :param func: the function to be wrapped by the Future - :param args: a series of argumnets to be passed into func - :param kwards: a series of keyword arguments to be passed into func + :param args: a series of arguments to be passed into func + :param kwargs: a series of keyword arguments to be passed into func example: def my_func(arg1, arg2, kwarg1): @@ -373,7 +373,7 @@ def to_synchronous(*awaitables: Awaitable[Any]) -> List[Any]: Synchronously waits for the Awaitable(s) to complete and returns their result(s). See https://docs.python.org/3.8/library/asyncio-task.html#asyncio-awaitables - :param awaitables: a series of Awaitable objects, with each object being its own parameter. i.e., not a single list of Awaitables + :param awaitables: a series of Awaitable objects, with each object being its own argument. i.e., not a single list of Awaitables example: async def my_async_func(my_arg): From 8f770f719acfdfe8f22bca87808f388bf11d2319 Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Tue, 22 Aug 2023 17:38:35 -0400 Subject: [PATCH 22/24] lint --- cartography/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cartography/util.py b/cartography/util.py index 4fdc25674..2aa0392be 100644 --- a/cartography/util.py +++ b/cartography/util.py @@ -329,7 +329,7 @@ def to_asynchronous(func: Callable[..., R], *args: Any, **kwargs: Any) -> Awaita :param func: the function to be wrapped by the Future :param args: a series of arguments to be passed into func :param kwargs: a series of keyword arguments to be passed into func - + example: def my_func(arg1, arg2, kwarg1): return arg1 + arg2 + kwarg1 From 1ad7a8fd222845f820a842d0d9cfec8bde53a063 Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Tue, 22 Aug 2023 17:40:18 -0400 Subject: [PATCH 23/24] lint --- .../intel/aws/test_permission_relationships.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/cartography/intel/aws/test_permission_relationships.py b/tests/unit/cartography/intel/aws/test_permission_relationships.py index b19604256..bc85bdd0e 100644 --- a/tests/unit/cartography/intel/aws/test_permission_relationships.py +++ b/tests/unit/cartography/intel/aws/test_permission_relationships.py @@ -36,7 +36,7 @@ def test_not_action_statement(): "action": [ "*", ], - "notaction":[ + "notaction": [ "S3:GetObject", ], "resource": [ @@ -209,7 +209,7 @@ def test_non_matching_notresource(): "action": [ "s3:Get*", ], - "resource":["*"], + "resource": ["*"], "notresource": [ "arn:aws:s3:::nottest", ], @@ -417,7 +417,7 @@ def test_single_comma(): "action": [ "s3:?et*", ], - "resource":["arn:aws:s3:::testbucke?"], + "resource": ["arn:aws:s3:::testbucke?"], "effect": "Allow", }, ] @@ -432,7 +432,7 @@ def test_multiple_comma(): "action": [ "s3:?et*", ], - "resource":["arn:aws:s3:::????bucket"], + "resource": ["arn:aws:s3:::????bucket"], "effect": "Allow", }, ] From 4edaefb6a783e7215ac23d837b8267cbd9ad3409 Mon Sep 17 00:00:00 2001 From: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> Date: Tue, 22 Aug 2023 17:42:54 -0400 Subject: [PATCH 24/24] lint --- cartography/util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cartography/util.py b/cartography/util.py index 2aa0392be..30b9bccd8 100644 --- a/cartography/util.py +++ b/cartography/util.py @@ -373,7 +373,8 @@ def to_synchronous(*awaitables: Awaitable[Any]) -> List[Any]: Synchronously waits for the Awaitable(s) to complete and returns their result(s). See https://docs.python.org/3.8/library/asyncio-task.html#asyncio-awaitables - :param awaitables: a series of Awaitable objects, with each object being its own argument. i.e., not a single list of Awaitables + :param awaitables: a series of Awaitable objects, with each object being its own argument. + i.e., not a single list of Awaitables example: async def my_async_func(my_arg):