Skip to content

Commit

Permalink
Use wildcard for paths in maven CF invalidating
Browse files Browse the repository at this point in the history
  * And ignore the indexing(index.html) CF invalidating per cost
  consideration
  * And change the root work dir for npm uploading
  • Loading branch information
ligangty committed Mar 28, 2024
1 parent a92f24e commit a52ba7a
Show file tree
Hide file tree
Showing 9 changed files with 118 additions and 76 deletions.
61 changes: 38 additions & 23 deletions charon/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,38 +59,53 @@ def __get_endpoint(self, extra_conf) -> str:
logger.debug("[CloudFront] No user-specified endpoint url is used.")
return endpoint_url

def invalidate_paths(self, distr_id: str, paths: List[str]) -> Dict[str, str]:
def invalidate_paths(
self, distr_id: str, paths: List[str],
batch_size: int = 15
) -> List[Dict[str, str]]:
"""Send a invalidating requests for the paths in distribution to CloudFront.
This will invalidate the paths in the distribution to enforce the refreshment
from backend S3 bucket for these paths. For details see:
https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/Invalidation.html
* The distr_id is the id for the distribution. This id can be get through
get_dist_id_by_domain(domain) function
* Can specify the invalidating paths through paths param.
* Batch size is the number of paths to be invalidated in one request.
Because paths contains wildcard(*), so the default value is 15 which
is the maximum number in official doc:
https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/Invalidation.html#InvalidationLimits
"""
caller_ref = str(uuid.uuid4())
logger.debug("[CloudFront] Creating invalidation for paths: %s", paths)
try:
response = self.__client.create_invalidation(
DistributionId=distr_id,
InvalidationBatch={
'CallerReference': caller_ref,
'Paths': {
'Quantity': len(paths),
'Items': paths
real_paths = [paths]
# Split paths into batches by batch_size
if batch_size:
real_paths = [paths[i:i + batch_size] for i in range(0, len(paths), batch_size)]
results = []
for batch_paths in real_paths:
caller_ref = str(uuid.uuid4())
try:
response = self.__client.create_invalidation(
DistributionId=distr_id,
InvalidationBatch={
'CallerReference': caller_ref,
'Paths': {
'Quantity': len(batch_paths),
'Items': batch_paths
}
}
}
)
if response:
invalidation = response.get('Invalidation', {})
return {
'Id': invalidation.get('Id', None),
'Status': invalidation.get('Status', None)
}
except Exception as err:
logger.error(
"[CloudFront] Error occurred while creating invalidation, error: %s", err
)
)
if response:
invalidation = response.get('Invalidation', {})
results.append({
'Id': invalidation.get('Id', None),
'Status': invalidation.get('Status', None)
})
except Exception as err:
logger.error(
"[CloudFront] Error occurred while creating invalidation"
" for paths %s, error: %s", batch_paths, err
)
return results

def check_invalidation(self, distr_id: str, invalidation_id: str) -> dict:
try:
Expand All @@ -115,7 +130,7 @@ def get_dist_id_by_domain(self, domain: str) -> str:
"""Get distribution id by a domain name. The id can be used to send invalidating
request through #invalidate_paths function
* Domain are Ronda domains, like "maven.repository.redhat.com"
or "npm.repository.redhat.com"
or "npm.registry.redhat.com"
"""
try:
response = self.__client.list_distributions()
Expand Down
13 changes: 7 additions & 6 deletions charon/pkgs/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
"""
from charon.config import get_template
from charon.storage import S3Client
from charon.cache import CFClient
from charon.pkgs.pkg_utils import invalidate_cf_paths
# from charon.cache import CFClient
# from charon.pkgs.pkg_utils import invalidate_cf_paths
from charon.constants import (INDEX_HTML_TEMPLATE, NPM_INDEX_HTML_TEMPLATE,
PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM, PROD_INFO_SUFFIX)
from charon.utils.files import digest_content
Expand Down Expand Up @@ -265,7 +265,7 @@ def re_index(
path: str,
package_type: str,
aws_profile: str = None,
cf_enable: bool = False,
# cf_enable: bool = False,
dry_run: bool = False
):
"""Refresh the index.html for the specified folder in the bucket.
Expand Down Expand Up @@ -312,9 +312,10 @@ def re_index(
index_path, index_content, (bucket_name, real_prefix),
"text/html", digest_content(index_content)
)
if cf_enable:
cf_client = CFClient(aws_profile=aws_profile)
invalidate_cf_paths(cf_client, bucket, [index_path])
# We will not invalidate index.html per cost consideration
# if cf_enable:
# cf_client = CFClient(aws_profile=aws_profile)
# invalidate_cf_paths(cf_client, bucket, [index_path])
else:
logger.warning(
"The path %s does not contain any contents in bucket %s. "
Expand Down
47 changes: 34 additions & 13 deletions charon/pkgs/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ def __get_mvn_template(kind: str, default: str) -> str:

META_TEMPLATE = __get_mvn_template("maven-metadata.xml.j2", MAVEN_METADATA_TEMPLATE)
ARCH_TEMPLATE = __get_mvn_template("archetype-catalog.xml.j2", ARCHETYPE_CATALOG_TEMPLATE)
STANDARD_GENERATED_IGNORES = ["maven-metadata.xml", "archetype-catalog.xml"]
MAVEN_METADATA_FILE = "maven-metadata.xml"
MAVEN_ARCH_FILE = "archetype-catalog.xml"
STANDARD_GENERATED_IGNORES = [MAVEN_METADATA_FILE, MAVEN_ARCH_FILE]


class MavenMetadata(object):
Expand Down Expand Up @@ -219,7 +221,7 @@ def gen_meta_file(group_id, artifact_id: str, versions: list, root="/", digest=T
).generate_meta_file_content()
g_path = "/".join(group_id.split("."))
meta_files = []
final_meta_path = os.path.join(root, g_path, artifact_id, "maven-metadata.xml")
final_meta_path = os.path.join(root, g_path, artifact_id, MAVEN_METADATA_FILE)
try:
overwrite_file(final_meta_path, content)
meta_files.append(final_meta_path)
Expand Down Expand Up @@ -374,7 +376,7 @@ def handle_maven_uploading(
cf_invalidate_paths.extend(meta_files.get(META_FILE_GEN_KEY, []))

# 8. Determine refreshment of archetype-catalog.xml
if os.path.exists(os.path.join(top_level, "archetype-catalog.xml")):
if os.path.exists(os.path.join(top_level, MAVEN_ARCH_FILE)):
logger.info("Start generating archetype-catalog.xml for bucket %s", bucket_name)
upload_archetype_file = _generate_upload_archetype_catalog(
s3=s3_client, bucket=bucket_name,
Expand Down Expand Up @@ -451,15 +453,16 @@ def handle_maven_uploading(
)
failed_metas.extend(_failed_metas)
logger.info("Index files updating done\n")
# Add index files to Cf invalidate paths
if cf_enable:
cf_invalidate_paths.extend(created_indexes)
# We will not invalidate the index files per cost consideration
# if cf_enable:
# cf_invalidate_paths.extend(created_indexes)
else:
logger.info("Bypass indexing")

# Finally do the CF invalidating for metadata files
if cf_enable and len(cf_invalidate_paths) > 0:
cf_client = CFClient(aws_profile=aws_profile)
cf_invalidate_paths = __wildcard_metadata_paths(cf_invalidate_paths)
invalidate_cf_paths(cf_client, bucket, cf_invalidate_paths, top_level)

upload_post_process(failed_files, failed_metas, prod_key, bucket_name)
Expand Down Expand Up @@ -578,7 +581,7 @@ def handle_maven_del(
cf_invalidate_paths.extend(all_meta_files)

# 7. Determine refreshment of archetype-catalog.xml
if os.path.exists(os.path.join(top_level, "archetype-catalog.xml")):
if os.path.exists(os.path.join(top_level, MAVEN_ARCH_FILE)):
logger.info("Start generating archetype-catalog.xml")
archetype_action = _generate_rollback_archetype_catalog(
s3=s3_client, bucket=bucket_name,
Expand Down Expand Up @@ -630,13 +633,15 @@ def handle_maven_del(
if len(_failed_index_files) > 0:
failed_metas.extend(_failed_index_files)
logger.info("Index files updating done.\n")
if cf_enable:
cf_invalidate_paths.extend(created_indexes)
# We will not invalidate the index files per cost consideration
# if cf_enable:
# cf_invalidate_paths.extend(created_indexes)
else:
logger.info("Bypassing indexing")

if cf_enable and len(cf_invalidate_paths):
cf_client = CFClient(aws_profile=aws_profile)
cf_invalidate_paths = __wildcard_metadata_paths(cf_invalidate_paths)
invalidate_cf_paths(cf_client, bucket, cf_invalidate_paths, top_level)

rollback_post_process(failed_files, failed_metas, prod_key, bucket_name)
Expand Down Expand Up @@ -1017,15 +1022,15 @@ def _generate_metadatas(
"No poms found in s3 bucket %s for GA path %s", bucket, path
)
meta_files_deletion = meta_files.get(META_FILE_DEL_KEY, [])
meta_files_deletion.append(os.path.join(path, "maven-metadata.xml"))
meta_files_deletion.extend(__hash_decorate_metadata(path, "maven-metadata.xml"))
meta_files_deletion.append(os.path.join(path, MAVEN_METADATA_FILE))
meta_files_deletion.extend(__hash_decorate_metadata(path, MAVEN_METADATA_FILE))
meta_files[META_FILE_DEL_KEY] = meta_files_deletion
else:
logger.warning("An error happened when scanning remote "
"artifacts under GA path %s", path)
meta_failed_path = meta_files.get(META_FILE_FAILED, [])
meta_failed_path.append(os.path.join(path, "maven-metadata.xml"))
meta_failed_path.extend(__hash_decorate_metadata(path, "maven-metadata.xml"))
meta_failed_path.append(os.path.join(path, MAVEN_METADATA_FILE))
meta_failed_path.extend(__hash_decorate_metadata(path, MAVEN_METADATA_FILE))
meta_files[META_FILE_FAILED] = meta_failed_path
else:
logger.debug(
Expand Down Expand Up @@ -1090,6 +1095,22 @@ def __get_suffix(package_type: str, conf: CharonConfig) -> List[str]:
return []


def __wildcard_metadata_paths(paths: List[str]) -> List[str]:
new_paths = []
for path in paths:
if path.endswith(MAVEN_METADATA_FILE)\
or path.endswith(MAVEN_ARCH_FILE):
new_paths.append(path[:-len(".xml")] + ".*")
elif path.endswith(".md5")\
or path.endswith(".sha1")\
or path.endswith(".sha128")\
or path.endswith(".sha256"):
continue
else:
new_paths.append(path)
return new_paths


class VersionCompareKey:
'Used as key function for version sorting'
def __init__(self, obj):
Expand Down
20 changes: 11 additions & 9 deletions charon/pkgs/npm.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ def handle_npm_uploading(

client = S3Client(aws_profile=aws_profile, dry_run=dry_run)
generated_signs = []
succeeded = True
root_dir = mkdtemp(prefix=f"npm-charon-{product}-", dir=dir_)
for bucket in buckets:
# prepare cf invalidate files
cf_invalidate_paths = []
Expand All @@ -113,7 +115,7 @@ def handle_npm_uploading(
prefix = remove_prefix(bucket[2], "/")
registry = bucket[3]
target_dir, valid_paths, package_metadata = _scan_metadata_paths_from_archive(
tarball_path, registry, prod=product, dir__=dir_
tarball_path, registry, prod=product, dir__=root_dir
)
if not os.path.isdir(target_dir):
logger.error("Error: the extracted target_dir path %s does not exist.", target_dir)
Expand All @@ -129,8 +131,6 @@ def handle_npm_uploading(
)
logger.info("Files uploading done\n")

succeeded = True

if not manifest_bucket_name:
logger.warning(
'Warning: No manifest bucket is provided, will ignore the process of manifest '
Expand Down Expand Up @@ -235,8 +235,9 @@ def handle_npm_uploading(
)
failed_metas.extend(_failed_metas)
logger.info("Index files updating done\n")
if cf_enable:
cf_invalidate_paths.extend(created_indexes)
# We will not invalidate the index files per cost consideration
# if cf_enable:
# cf_invalidate_paths.extend(created_indexes)
else:
logger.info("Bypass indexing\n")

Expand All @@ -248,7 +249,7 @@ def handle_npm_uploading(
upload_post_process(failed_files, failed_metas, product, bucket_name)
succeeded = succeeded and len(failed_files) == 0 and len(failed_metas) == 0

return (target_dir, succeeded)
return (root_dir, succeeded)


def handle_npm_del(
Expand Down Expand Up @@ -360,9 +361,10 @@ def handle_npm_del(
)
failed_metas.extend(_failed_index_files)
logger.info("Index files updating done.\n")
if cf_enable and len(created_indexes):
logger.debug("Add index files to cf invalidate list: %s", created_indexes)
cf_invalidate_paths.extend(created_indexes)
# We will not invalidate the index files per cost consideration
# if cf_enable and len(created_indexes):
# logger.debug("Add index files to cf invalidate list: %s", created_indexes)
# cf_invalidate_paths.extend(created_indexes)
else:
logger.info("Bypassing indexing\n")

Expand Down
22 changes: 14 additions & 8 deletions charon/pkgs/pkg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,17 @@ def invalidate_cf_paths(
logger.debug("Invalidating paths: %s", final_paths)
if not domain:
domain = cf_client.get_domain_by_bucket(bucket_name)
distr_id = cf_client.get_dist_id_by_domain(domain)
if distr_id:
result = cf_client.invalidate_paths(distr_id, final_paths)
if result:
logger.info(
"The CF invalidating request for metadata/indexing is sent, "
"request id %s, status is %s", result['Id'], result['Status']
)
if domain:
distr_id = cf_client.get_dist_id_by_domain(domain)
if distr_id:
result = cf_client.invalidate_paths(distr_id, final_paths)
if result:
logger.info(
"The CF invalidating request for metadata/indexing is sent, "
"request status as below:\n %s", result
)
else:
logger.error(
"CF invalidating will not be performed because domain not found for"
" bucket %s. ", bucket_name
)
9 changes: 5 additions & 4 deletions tests/test_cf_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@
from tests.constants import INPUTS
from moto import mock_aws
import os
import pytest


@mock_aws
class CFReIndexTest(CFBasedTest):
@pytest.mark.skip(reason="Indexing CF invalidation is abandoned")
def test_cf_maven_after_reindex(self):
response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id)
self.assertIsNotNone(response)
Expand All @@ -41,8 +43,7 @@ def test_cf_maven_after_reindex(self):

re_index(
(TEST_BUCKET, TEST_BUCKET, "ga", "", "maven.repository.redhat.com"),
"org/apache/httpcomponents/httpclient/", "maven",
cf_enable=True
"org/apache/httpcomponents/httpclient/", "maven"
)

response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id)
Expand All @@ -51,6 +52,7 @@ def test_cf_maven_after_reindex(self):
self.assertEqual(1, len(items))
self.assertEqual('completed', str.lower(items[0].get('Status')))

@pytest.mark.skip(reason="Indexing CF invalidation is abandoned")
def test_cf_npm_after_reindex(self):
response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id)
self.assertIsNotNone(response)
Expand All @@ -66,8 +68,7 @@ def test_cf_npm_after_reindex(self):

re_index(
(TEST_BUCKET, TEST_BUCKET, "", "", "npm.registry.redhat.com"),
"@babel/", "npm",
cf_enable=True
"@babel/", "npm"
)

response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id)
Expand Down
10 changes: 5 additions & 5 deletions tests/test_cfclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ def test_get_distribution_id(self):
def test_invalidate_paths(self):
dist_id = self.cf_client.get_dist_id_by_domain("maven.repository.redhat.com")
result = self.cf_client.invalidate_paths(dist_id, ["/*"])
self.assertIsNotNone(result['Id'])
self.assertEqual('completed', str.lower(result['Status']))
self.assertTrue(result[0]['Id'])
self.assertEqual('completed', str.lower(result[0]['Status']))
status = self.cf_client.invalidate_paths("noexists_id", ["/*"])
self.assertIsNone(status)
self.assertFalse(status)

@pytest.mark.skip(reason="""
Because current moto 5.0.3 has not implemented the get_invalidation(),
Expand All @@ -60,6 +60,6 @@ def test_invalidate_paths(self):
def test_check_invalidation(self):
dist_id = self.cf_client.get_dist_id_by_domain("maven.repository.redhat.com")
result = self.cf_client.invalidate_paths(dist_id, ["/*"])
invalidation = self.cf_client.check_invalidation(dist_id, result['Id'])
invalidation = self.cf_client.check_invalidation(dist_id, result[0]['Id'])
self.assertIsNotNone(invalidation['Id'])
self.assertEqual('completed', str.lower(result['Status']))
self.assertEqual('completed', str.lower(result[0]['Status']))
3 changes: 1 addition & 2 deletions tests/test_maven_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,7 @@ def test_re_index(self):
)
re_index(
(TEST_BUCKET, TEST_BUCKET, "", "", None),
commons_client_root, "maven",
cf_enable=True
commons_client_root, "maven"
)
indedx_obj = test_bucket.Object(COMMONS_CLIENT_INDEX)
index_content = str(indedx_obj.get()["Body"].read(), "utf-8")
Expand Down
Loading

0 comments on commit a52ba7a

Please sign in to comment.