Skip to content

Commit

Permalink
Add support for CloudFront invalidating
Browse files Browse the repository at this point in the history
  • Loading branch information
ligangty committed Mar 25, 2024
1 parent b9cd1b5 commit 80f9840
Show file tree
Hide file tree
Showing 37 changed files with 649 additions and 108 deletions.
8 changes: 8 additions & 0 deletions charon.spec
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,14 @@ export LANG=en_US.UTF-8 LANGUAGE=en_US.en LC_ALL=en_US.UTF-8


%changelog
* Fri Mar 29 2024 Gang Li <[email protected]>
- 1.3.0 release
- Add validate command: validate the checksum for maven artifacts
- Add index command: support to re-index of the speicified folder
- Add CF invalidating features:
- Invalidate generated metadata files (maven-metadata*/package.json/index.html) after product uploading/deleting in CloudFront
- Add command to do CF invalidating and checking

* Mon Sep 18 2023 Harsh Modi <[email protected]>
- 1.2.2 release
- hot fix for "dist_tags" derived issue
Expand Down
134 changes: 134 additions & 0 deletions charon/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
from boto3 import session
from botocore.exceptions import ClientError
from typing import Dict, List
import os
import logging
import uuid

logger = logging.getLogger(__name__)

ENDPOINT_ENV = "aws_endpoint_url"

DEFAULT_BUCKET_TO_DOMAIN = {
"prod-maven-ga": "maven.repository.redhat.com",
"prod-maven-ea": "maven.repository.redhat.com",
"stage-maven-ga": "maven.strage.repository.redhat.com",
"stage-maven-ea": "maven.strage.repository.redhat.com",
"prod-npm": "npm.repository.redhat.com",
"stage-npm": "npm.stage.repository.redhat.com"
}


class CFClient(object):
"""The CFClient is a wrapper of the original boto3 clouldfrong client,
which will provide CloudFront functions to be used in the charon.
"""

def __init__(
self,
aws_profile=None,
extra_conf=None
) -> None:
self.__client = self.__init_aws_client(aws_profile, extra_conf)

def __init_aws_client(
self, aws_profile=None, extra_conf=None
):
if aws_profile:
logger.debug("Using aws profile: %s", aws_profile)
cf_session = session.Session(profile_name=aws_profile)
else:
cf_session = session.Session()
endpoint_url = self.__get_endpoint(extra_conf)
return cf_session.client(
'cloudfront',
endpoint_url=endpoint_url
)

def __get_endpoint(self, extra_conf) -> str:
endpoint_url = os.getenv(ENDPOINT_ENV)
if not endpoint_url or not endpoint_url.strip():
if isinstance(extra_conf, Dict):
endpoint_url = extra_conf.get(ENDPOINT_ENV, None)
if endpoint_url:
logger.info("Using endpoint url for aws client: %s", endpoint_url)
else:
logger.debug("No user-specified endpoint url is used.")
return endpoint_url

def invalidate_paths(self, distr_id: str, paths: List[str]) -> Dict[str, str]:
"""Send a invalidating requests for the paths in distribution to CloudFront.
This will invalidate the paths in the distribution to enforce the refreshment
from backend S3 bucket for these paths. For details see:
https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/Invalidation.html
* The distr_id is the id for the distribution. This id can be get through
get_dist_id_by_domain(domain) function
* Can specify the invalidating paths through paths param.
"""
caller_ref = str(uuid.uuid4())
logger.debug("[CloudFront] Creating invalidation for paths: %s", paths)
try:
response = self.__client.create_invalidation(
DistributionId=distr_id,
InvalidationBatch={
'CallerReference': caller_ref,
'Paths': {
'Quantity': len(paths),
'Items': paths
}
}
)
if response:
invalidation = response.get('Invalidation', {})
return {
'Id': invalidation.get('Id', None),
'Status': invalidation.get('Status', None)
}
except Exception as err:
logger.error(
"[CloudFront] Error occurred while creating invalidation, error: %s", err
)

def check_invalidation(self, distr_id: str, invalidation_id: str) -> dict:
try:
response = self.__client.get_invalidation(
DistributionId=distr_id,
Id=invalidation_id
)
if response:
invalidation = response.get('Invalidation', {})
return {
'Id': invalidation.get('Id', None),
'CreateTime': invalidation.get('CreateTime', None),
'Status': invalidation.get('Status', None)
}
except Exception as err:
logger.error(
"[CloudFront] Error occurred while check invalidation of id %s, "
"error: %s", invalidation_id, err
)

def get_dist_id_by_domain(self, domain: str) -> str:
"""Get distribution id by a domain name. The id can be used to send invalidating
request through #invalidate_paths function
* Domain are Ronda domains, like "maven.repository.redhat.com"
or "npm.repository.redhat.com"
"""
try:
response = self.__client.list_distributions()
if response:
dist_list_items = response.get("DistributionList", {}).get("Items", [])
for distr in dist_list_items:
aliases_items = distr.get('Aliases', {}).get('Items', [])
if aliases_items and domain in aliases_items:
return distr['Id']
logger.error("[CloudFront]: Distribution not found for domain %s", domain)
except ClientError as err:
logger.error(
"[CloudFront]: Error occurred while get distribution for domain %s: %s",
domain, err
)
return None

def get_domain_by_bucket(self, bucket: str) -> str:
return DEFAULT_BUCKET_TO_DOMAIN.get(bucket, None)
2 changes: 2 additions & 0 deletions charon/cmd/cmd_delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ def delete(
buckets=buckets,
aws_profile=aws_profile,
dir_=work_dir,
cf_enable=conf.is_aws_cf_enable(),
dry_run=dryrun,
manifest_bucket_name=manifest_bucket_name
)
Expand All @@ -178,6 +179,7 @@ def delete(
buckets=buckets,
aws_profile=aws_profile,
dir_=work_dir,
cf_enable=conf.is_aws_cf_enable(),
dry_run=dryrun,
manifest_bucket_name=manifest_bucket_name
)
Expand Down
45 changes: 21 additions & 24 deletions charon/cmd/cmd_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,34 +87,31 @@ def index(
# log is recorded get_target
sys.exit(1)

aws_bucket = None
prefix = None
for b in conf.get_target(target):
for b in tgt:
aws_bucket = b.get('bucket')
prefix = b.get('prefix', '')

package_type = None
if "maven" in aws_bucket:
logger.info(
"The target is a maven repository. Will refresh the index as maven package type"
)
package_type = PACKAGE_TYPE_MAVEN
elif "npm" in aws_bucket:
package_type = PACKAGE_TYPE_NPM
logger.info(
"The target is a npm repository. Will refresh the index as npm package type"
)
else:
logger.error(
"The target is not supported. Only maven or npm target is supported."
)
sys.exit(1)
package_type = None
if "maven" in aws_bucket:
logger.info(
"The target is a maven repository. Will refresh the index as maven package type"
)
package_type = PACKAGE_TYPE_MAVEN
elif "npm" in aws_bucket:
package_type = PACKAGE_TYPE_NPM
logger.info(
"The target is a npm repository. Will refresh the index as npm package type"
)
else:
logger.error(
"The target %s is not supported. Only maven or npm target is supported.",
target
)

if not aws_bucket:
logger.error("No bucket specified!")
sys.exit(1)
if not aws_bucket:
logger.error("No bucket specified for target %s!", target)
else:
re_index(b, path, package_type, aws_profile, dryrun)

re_index(aws_bucket, prefix, path, package_type, aws_profile, dryrun)
except Exception:
print(traceback.format_exc())
sys.exit(2) # distinguish between exception and bad config or bad state
2 changes: 2 additions & 0 deletions charon/cmd/cmd_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def upload(
aws_profile=aws_profile,
dir_=work_dir,
gen_sign=contain_signature,
cf_enable=conf.is_aws_cf_enable(),
key=sign_key,
dry_run=dryrun,
manifest_bucket_name=manifest_bucket_name
Expand All @@ -200,6 +201,7 @@ def upload(
aws_profile=aws_profile,
dir_=work_dir,
gen_sign=contain_signature,
cf_enable=conf.is_aws_cf_enable(),
key=sign_key,
dry_run=dryrun,
manifest_bucket_name=manifest_bucket_name
Expand Down
3 changes: 2 additions & 1 deletion charon/cmd/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ def _get_buckets(targets: List[str], conf: CharonConfig) -> List[Tuple[str, str,
aws_bucket = bucket.get('bucket')
prefix = bucket.get('prefix', '')
registry = bucket.get('registry', DEFAULT_REGISTRY)
buckets.append((target, aws_bucket, prefix, registry))
cf_domain = bucket.get('domain', None)
buckets.append((target, aws_bucket, prefix, registry, cf_domain))
return buckets


Expand Down
4 changes: 4 additions & 0 deletions charon/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def __init__(self, data: Dict):
self.__manifest_bucket: str = data.get("manifest_bucket", None)
self.__ignore_signature_suffix: Dict = data.get("ignore_signature_suffix", None)
self.__signature_command: str = data.get("detach_signature_command", None)
self.__aws_cf_enable: bool = data.get("aws_cf_enable", False)

def get_ignore_patterns(self) -> List[str]:
return self.__ignore_patterns
Expand All @@ -63,6 +64,9 @@ def get_ignore_signature_suffix(self, package_type: str) -> List[str]:
def get_detach_signature_command(self) -> str:
return self.__signature_command

def is_aws_cf_enable(self) -> bool:
return self.__aws_cf_enable


def get_config() -> Optional[CharonConfig]:
config_file_path = os.path.join(os.getenv("HOME"), ".charon", CONFIG_FILE)
Expand Down
21 changes: 14 additions & 7 deletions charon/pkgs/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@
"""
from charon.config import get_template
from charon.storage import S3Client
from charon.cache import CFClient
from charon.pkgs.pkg_utils import invalidate_cf_paths
from charon.constants import (INDEX_HTML_TEMPLATE, NPM_INDEX_HTML_TEMPLATE,
PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM, PROD_INFO_SUFFIX)
from charon.utils.files import digest_content
from jinja2 import Template
import os
import logging
from typing import List, Set
from typing import List, Set, Tuple

from charon.utils.strings import remove_prefix

Expand Down Expand Up @@ -259,21 +261,23 @@ def __compare(self, other) -> int:


def re_index(
bucket: str,
prefix: str,
bucket: Tuple[str, str, str, str, str],
path: str,
package_type: str,
aws_profile: str = None,
cf_enable: bool = False,
dry_run: bool = False
):
"""Refresh the index.html for the specified folder in the bucket.
"""
bucket_name = bucket[1]
prefix = bucket[2]
s3_client = S3Client(aws_profile=aws_profile, dry_run=dry_run)
real_prefix = prefix if prefix.strip() != "/" else ""
s3_folder = os.path.join(real_prefix, path)
if path.strip() == "" or path.strip() == "/":
s3_folder = prefix
items: List[str] = s3_client.list_folder_content(bucket, s3_folder)
items: List[str] = s3_client.list_folder_content(bucket_name, s3_folder)
contents = [i for i in items if not i.endswith(PROD_INFO_SUFFIX)]
if PACKAGE_TYPE_NPM == package_type:
if any([True if "package.json" in c else False for c in contents]):
Expand Down Expand Up @@ -303,14 +307,17 @@ def re_index(
index_path = os.path.join(path, "index.html")
if path == "/":
index_path = "index.html"
s3_client.simple_delete_file(index_path, (bucket, real_prefix))
s3_client.simple_delete_file(index_path, (bucket_name, real_prefix))
s3_client.simple_upload_file(
index_path, index_content, (bucket, real_prefix),
index_path, index_content, (bucket_name, real_prefix),
"text/html", digest_content(index_content)
)
if cf_enable:
cf_client = CFClient(aws_profile=aws_profile)
invalidate_cf_paths(cf_client, bucket, [index_path])
else:
logger.warning(
"The path %s does not contain any contents in bucket %s. "
"Will not do any re-indexing",
path, bucket
path, bucket_name
)
Loading

0 comments on commit 80f9840

Please sign in to comment.