Skip to content

Commit

Permalink
Added quick version checks to risky cluster-* and config-* cmds
Browse files Browse the repository at this point in the history
Signed-off-by: Chris Helma <[email protected]>
  • Loading branch information
chelma committed Jan 23, 2024
1 parent 6c70ed4 commit 2e46eee
Show file tree
Hide file tree
Showing 12 changed files with 353 additions and 49 deletions.
16 changes: 12 additions & 4 deletions manage_arkime/commands/cluster_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
S3Plan, DEFAULT_S3_STORAGE_CLASS, DEFAULT_S3_STORAGE_DAYS, DEFAULT_HISTORY_DAYS,
CaptureNodesPlan, ViewerNodesPlan, DataNodesPlan, EcsSysResourcePlan, MasterNodesPlan, OSDomainPlan,
get_viewer_vpc_plan)
from core.versioning import get_version_info
import core.versioning as ver
from core.user_config import UserConfig

logger = logging.getLogger(__name__)
Expand All @@ -38,14 +38,22 @@ def cmd_cluster_create(profile: str, region: str, name: str, expected_traffic: f
aws_env = aws_provider.get_aws_env()
cdk_client = CdkClient(aws_env)

# Confirm the CLI and Cluster versions are compatible
is_initial_invocation = _is_initial_invocation(name, aws_provider)
if not is_initial_invocation:
try:
ver.confirm_aws_aio_version_compatibility(name, aws_provider)
except (ver.CliClusterVersionMismatch, ver.CaptureViewerVersionMismatch, ver.UnableToRetrieveClusterVersion) as e:
logger.error(e)
logger.warning("Aborting...")
return

# Generate our capacity plan, then confirm it's what the user expected and it's safe to proceed with the operation
previous_user_config = _get_previous_user_config(name, aws_provider)
next_user_config = _get_next_user_config(name, expected_traffic, spi_days, history_days, replicas, pcap_days, aws_provider)
previous_capacity_plan = _get_previous_capacity_plan(name, aws_provider)
next_capacity_plan = _get_next_capacity_plan(next_user_config, previous_capacity_plan, capture_cidr, viewer_cidr, aws_provider)

is_initial_invocation = _is_initial_invocation(name, aws_provider)

if not _should_proceed_with_operation(is_initial_invocation, previous_capacity_plan, next_capacity_plan, previous_user_config,
next_user_config, preconfirm_usage, capture_cidr, viewer_cidr):
return
Expand Down Expand Up @@ -250,7 +258,7 @@ def _upload_arkime_config_if_necessary(cluster_name: str, bucket_name: str, s3_k
# Generate its metadata
next_metadata = config_wrangling.ConfigDetails(
s3=config_wrangling.S3Details(bucket_name, s3_key),
version=get_version_info(archive)
version=ver.get_version_info(archive)
)

# Upload the archive to S3
Expand Down
8 changes: 8 additions & 0 deletions manage_arkime/commands/cluster_deregister_vpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import aws_interactions.ssm_operations as ssm_ops
import core.constants as constants
from core.cross_account_wrangling import CrossAccountAssociation, remove_vpce_permissions
import core.versioning as ver

logger = logging.getLogger(__name__)

Expand All @@ -15,6 +16,13 @@ def cmd_cluster_deregister_vpc(profile: str, region: str, cluster_name: str, vpc
logger.info("Deregistering the VPC with the Cluster...")
aws_provider = AwsClientProvider(aws_profile=profile, aws_region=region)

try:
ver.confirm_aws_aio_version_compatibility(cluster_name, aws_provider)
except (ver.CliClusterVersionMismatch, ver.CaptureViewerVersionMismatch, ver.UnableToRetrieveClusterVersion) as e:
logger.error(e)
logger.warning("Aborting...")
return

# Confirm the cross-account link exists
try:
ssm_param_name = constants.get_cluster_vpc_cross_account_ssm_param_name(cluster_name, vpc_id)
Expand Down
12 changes: 8 additions & 4 deletions manage_arkime/commands/cluster_destroy.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from cdk_interactions.cdk_client import CdkClient
from core.capacity_planning import ClusterPlan
import core.constants as constants
import core.versioning as ver
import cdk_interactions.cdk_context as context

logger = logging.getLogger(__name__)
Expand All @@ -25,12 +26,15 @@ def cmd_cluster_destroy(profile: str, region: str, name: str, destroy_everything
cdk_client = CdkClient(aws_provider.get_aws_env())

try:
cluster_plan_str = get_ssm_param_json_value(constants.get_cluster_ssm_param_name(name), "capacityPlan", aws_provider)
cluster_plan = ClusterPlan.from_dict(cluster_plan_str)
except ParamDoesNotExist:
logger.warning(f"The Cluster {name} does not appear to exist; aborting...")
ver.confirm_aws_aio_version_compatibility(name, aws_provider)
except (ver.CliClusterVersionMismatch, ver.CaptureViewerVersionMismatch, ver.UnableToRetrieveClusterVersion) as e:
logger.error(e)
logger.warning("Aborting...")
return

cluster_plan_str = get_ssm_param_json_value(constants.get_cluster_ssm_param_name(name), "capacityPlan", aws_provider)
cluster_plan = ClusterPlan.from_dict(cluster_plan_str)

vpcs_search_path = f"{constants.get_cluster_ssm_param_name(name)}/vpcs"
monitored_vpcs = get_ssm_names_by_path(vpcs_search_path, aws_provider)
if monitored_vpcs:
Expand Down
18 changes: 10 additions & 8 deletions manage_arkime/commands/cluster_register_vpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import aws_interactions.ssm_operations as ssm_ops
import core.constants as constants
from core.cross_account_wrangling import CrossAccountAssociation, ensure_cross_account_role_exists, add_vpce_permissions
import core.versioning as ver

logger = logging.getLogger(__name__)

Expand All @@ -15,18 +16,19 @@ def cmd_cluster_register_vpc(profile: str, region: str, cluster_name: str, vpc_a
aws_provider = AwsClientProvider(aws_profile=profile, aws_region=region)
aws_env = aws_provider.get_aws_env()

# Confirm the cluster exists
try:
vpce_service_id = ssm_ops.get_ssm_param_json_value(
constants.get_cluster_ssm_param_name(cluster_name),
"vpceServiceId",
aws_provider
)
except ssm_ops.ParamDoesNotExist:
logger.error(f"The cluster {cluster_name} does not exist; try using the clusters-list command to see the clusters you have created.")
ver.confirm_aws_aio_version_compatibility(cluster_name, aws_provider)
except (ver.CliClusterVersionMismatch, ver.CaptureViewerVersionMismatch, ver.UnableToRetrieveClusterVersion) as e:
logger.error(e)
logger.warning("Aborting...")
return

vpce_service_id = ssm_ops.get_ssm_param_json_value(
constants.get_cluster_ssm_param_name(cluster_name),
"vpceServiceId",
aws_provider
)

# Create the cross account IAM role for the VPC account to access the Cluster account
role_name = ensure_cross_account_role_exists(cluster_name, vpc_account_id, vpc_id, aws_provider, aws_env)

Expand Down
21 changes: 15 additions & 6 deletions manage_arkime/commands/config_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import aws_interactions.ssm_operations as ssm_ops
import core.constants as constants
from core.local_file import LocalFile, S3File
from core.versioning import get_version_info
import core.versioning as ver

logger = logging.getLogger(__name__)

Expand All @@ -23,13 +23,22 @@ def cmd_config_update(profile: str, region: str, cluster_name: str, capture: boo
no_component_specified = not (capture or viewer)
if config_version and (not one_component_specified):
logger.error("If you specify a specific config version to deploy, you must indicate whether to deploy it to"
+ " either the Capture or Viewer nodes. Aborting...")
+ " either the Capture or Viewer nodes.")
logger.warning("Aborting...")
exit(1)

# Update Capture/Viewer config in the cloud, if there's a new version locally. Bounce the associated ECS Tasks
# if we updated the configuration so that they pick it up.
aws_provider = AwsClientProvider(aws_profile=profile, aws_region=region)
aws_env = aws_provider.get_aws_env()

try:
ver.confirm_aws_aio_version_compatibility(cluster_name, aws_provider)
except (ver.CliClusterVersionMismatch, ver.CaptureViewerVersionMismatch, ver.UnableToRetrieveClusterVersion) as e:
logger.error(e)
logger.warning("Aborting...")
return

# Update Capture/Viewer config in the cloud, if there's a new version locally. Bounce the associated ECS Tasks
# if we updated the configuration so that they pick it up.
bucket_name = constants.get_config_bucket_name(aws_env.aws_account, aws_env.aws_region, cluster_name)

logger.info("Updating Arkime config for Capture Nodes, if necessary...")
Expand Down Expand Up @@ -92,7 +101,7 @@ def _update_config_if_necessary(cluster_name: str, bucket_name: str, s3_key_prov
# Create the local config archive and its metadata
aws_env = aws_provider.get_aws_env()
archive = archive_provider(cluster_name, aws_env)
archive_md5 = get_version_info(archive).md5_version
archive_md5 = ver.get_version_info(archive).md5_version

# Confirm the requested version exists, if specified
if switch_to_version:
Expand Down Expand Up @@ -149,7 +158,7 @@ def _update_config_if_necessary(cluster_name: str, bucket_name: str, s3_key_prov
if switch_to_version
else config_wrangling.ConfigDetails(
s3=config_wrangling.S3Details(bucket_name, s3_key_provider(next_config_version)),
version=get_version_info(archive, config_version=next_config_version),
version=ver.get_version_info(archive, config_version=next_config_version),
previous=cloud_config_details
)
)
Expand Down
57 changes: 57 additions & 0 deletions manage_arkime/core/versioning.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
from dataclasses import dataclass
from datetime import datetime, timezone
import hashlib
import json
import logging
from typing import Dict

import arkime_interactions.config_wrangling as config_wrangling
from aws_interactions.aws_client_provider import AwsClientProvider
import aws_interactions.ssm_operations as ssm_ops
import core.constants as constants
from core.local_file import LocalFile
from core.shell_interactions import call_shell_command

logger = logging.getLogger(__name__)

"""
Manually updated/managed version number. Increment if/when a backwards incompatible change is made.
"""
Expand Down Expand Up @@ -74,4 +82,53 @@ def get_version_info(config_file: LocalFile, config_version: str = None) -> Vers
datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
)

class UnableToRetrieveClusterVersion(Exception):
def __init__(self, cluster_name: str, cli_version: int):
super().__init__(f"It appears the cluster {cluster_name} does not exist. There's also a chance the AWS AIO version"
+ f" of the CLI ({cli_version}) is incompatible with your Cluster. If you're confident the Cluster"
+ " exists, you can try checking the AWS AIO version of your cluster using the clusters-list"
+ " command. The CLI and Cluster versions must match.")

class CaptureViewerVersionMismatch(Exception):
def __init__(self, capture_version: int, viewer_version: int):
super().__init__(f"The AWS AIO versions of your Capture ({capture_version}) and Viewer ({viewer_version}) components"
+ " do not match. This is unexpected and should not happen. Please cut us a ticket at:"
+ " https://github.com/arkime/aws-aio/issues/new")

class CliClusterVersionMismatch(Exception):
def __init__(self, cli_version: int, cluster_version: int):
super().__init__(f"The AWS AIO versions of your CLI ({cli_version}) and Cluster ({cluster_version}) do not"
+ " match. This is likely to result in unexpected behavior. Please revert your CLI to the latest"
+ f" minor version under the major version ({cluster_version}). You can see a version listing of"
+ " the CLI using the command: git ls-remote --tags [email protected]:arkime/aws-aio.git")

def confirm_aws_aio_version_compatibility(cluster_name: str, aws_provider: AwsClientProvider, cli_version: int = AWS_AIO_VERSION):
# Unfortunately, it appears currently impossible to distinguish between the scenarios where the cluster doesn't
# exist and the cluster exists but is a different version. In either case, we could get the ParamDoesNotExist
# exception.
try:
raw_capture_details_val = ssm_ops.get_ssm_param_value(
constants.get_capture_config_details_ssm_param_name(cluster_name),
aws_provider
)
capture_config_details = config_wrangling.ConfigDetails.from_dict(json.loads(raw_capture_details_val))

raw_viewer_details_val = ssm_ops.get_ssm_param_value(
constants.get_viewer_config_details_ssm_param_name(cluster_name),
aws_provider
)
viewer_config_details = config_wrangling.ConfigDetails.from_dict(json.loads(raw_viewer_details_val))
except ssm_ops.ParamDoesNotExist:
raise UnableToRetrieveClusterVersion(cluster_name, cli_version)

capture_version = int(capture_config_details.version.aws_aio_version)
viewer_version = int(viewer_config_details.version.aws_aio_version)

if capture_version != viewer_version:
raise CaptureViewerVersionMismatch(capture_version, viewer_version)

if capture_version != cli_version:
raise CliClusterVersionMismatch(cli_version, capture_version)

# Everything matches, we're good to go
return
63 changes: 60 additions & 3 deletions test_manage_arkime/commands/test_cluster_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
DEFAULT_VIEWER_PUBLIC_MASK)
import core.local_file as local_file
from core.user_config import UserConfig
from core.versioning import VersionInfo
from core.versioning import VersionInfo, CliClusterVersionMismatch


@mock.patch("commands.cluster_create.AwsClientProvider")
Expand Down Expand Up @@ -109,6 +109,63 @@ def test_WHEN_cmd_cluster_create_called_THEN_cdk_command_correct(mock_cdk_client
]
assert expected_tag_calls == mock_tag.call_args_list

@mock.patch("commands.cluster_create.AwsClientProvider", mock.Mock())
@mock.patch("commands.cluster_create.ver.confirm_aws_aio_version_compatibility")
@mock.patch("commands.cluster_create._is_initial_invocation")
@mock.patch("commands.cluster_create._tag_domain")
@mock.patch("commands.cluster_create._set_up_arkime_config")
@mock.patch("commands.cluster_create._configure_ism")
@mock.patch("commands.cluster_create._get_previous_user_config")
@mock.patch("commands.cluster_create._get_previous_capacity_plan")
@mock.patch("commands.cluster_create._should_proceed_with_operation")
@mock.patch("commands.cluster_create._get_next_user_config")
@mock.patch("commands.cluster_create._get_next_capacity_plan")
@mock.patch("commands.cluster_create._set_up_viewer_cert")
@mock.patch("commands.cluster_create.CdkClient")
def test_WHEN_cmd_cluster_create_called_AND_ver_mismatch_THEN_as_expected(mock_cdk_client_cls, mock_set_up, mock_get_plans, mock_get_config,
mock_proceed, mock_get_prev_plan, mock_get_prev_config, mock_configure,
mock_set_up_arkime_conf, mock_tag, mock_initial, mock_confirm_ver):
# Set up our mock
mock_set_up.return_value = "arn"

mock_client = mock.Mock()
mock_cdk_client_cls.return_value = mock_client

user_config = mock.Mock()
mock_get_config.return_value = user_config
mock_get_prev_config.return_value = user_config

cluster_plan = mock.Mock()
mock_get_plans.return_value = cluster_plan
mock_get_prev_plan.return_value = cluster_plan

mock_initial.return_value = False
mock_proceed.return_value = True

mock_confirm_ver.side_effect = CliClusterVersionMismatch(2, 1)

# Run our test
cmd_cluster_create("profile", "region", "my-cluster", None, None, None, None, None, True, False, "1.2.3.4/24", "2.3.4.5/26")

# Check our results
expected_proceed_calls = []
assert expected_proceed_calls == mock_proceed.call_args_list

expected_calls = []
assert expected_calls == mock_client.deploy.call_args_list

expected_set_up_calls = []
assert expected_set_up_calls == mock_set_up.call_args_list

expected_configure_calls = []
assert expected_configure_calls == mock_configure.call_args_list

expected_tag_calls = []
assert expected_tag_calls == mock_tag.call_args_list

expected_set_up_arkime_conf_calls = []
assert expected_set_up_arkime_conf_calls == mock_set_up_arkime_conf.call_args_list

@mock.patch("commands.cluster_create.AwsClientProvider", mock.Mock())
@mock.patch("commands.cluster_create._is_initial_invocation")
@mock.patch("commands.cluster_create._tag_domain")
Expand Down Expand Up @@ -898,7 +955,7 @@ def test_WHEN_configure_ism_called_THEN_as_expected(mock_events, mock_ssm):

@mock.patch("commands.cluster_create.ssm_ops.get_ssm_param_value")
@mock.patch("commands.cluster_create.ssm_ops.put_ssm_param")
@mock.patch("commands.cluster_create.get_version_info")
@mock.patch("commands.cluster_create.ver.get_version_info")
@mock.patch("commands.cluster_create.config_wrangling.get_viewer_config_archive")
@mock.patch("commands.cluster_create.config_wrangling.get_capture_config_archive")
@mock.patch("commands.cluster_create.s3.put_file_to_bucket")
Expand Down Expand Up @@ -989,7 +1046,7 @@ def test_WHEN_set_up_arkime_config_called_AND_happy_path_THEN_as_expected(mock_s

@mock.patch("commands.cluster_create.ssm_ops.get_ssm_param_value")
@mock.patch("commands.cluster_create.ssm_ops.put_ssm_param")
@mock.patch("commands.cluster_create.get_version_info")
@mock.patch("commands.cluster_create.ver.get_version_info")
@mock.patch("commands.cluster_create.config_wrangling.get_viewer_config_archive")
@mock.patch("commands.cluster_create.config_wrangling.get_capture_config_archive")
@mock.patch("commands.cluster_create.s3.put_file_to_bucket")
Expand Down
Loading

0 comments on commit 2e46eee

Please sign in to comment.