From c152cf4dfe04612dac25eae1d8eceda2d8aaaf59 Mon Sep 17 00:00:00 2001 From: git-hyagi <45576767+git-hyagi@users.noreply.github.com> Date: Fri, 20 Sep 2024 14:04:38 -0300 Subject: [PATCH] Add os/arch/layers_size fields to manifest model closes: #1767 --- CHANGES/1767.feature | 1 + .../commands/container-handle-image-data.py | 45 ++++++++++++-- ...pdate_manifest_and_manifestlistmanifest.py | 49 +++++++++++++++ pulp_container/app/models.py | 39 +++++++++++- pulp_container/app/registry.py | 9 +++ pulp_container/app/registry_api.py | 22 ++++--- pulp_container/app/serializers.py | 18 ++++++ pulp_container/app/tasks/builder.py | 6 +- pulp_container/app/tasks/sync_stages.py | 61 +++++++++++++------ .../tests/functional/api/test_build_images.py | 12 +++- .../functional/api/test_pull_through_cache.py | 8 +++ .../tests/functional/api/test_push_content.py | 10 +++ .../tests/functional/api/test_sync.py | 27 +++++++- 13 files changed, 267 insertions(+), 40 deletions(-) create mode 100644 CHANGES/1767.feature create mode 100644 pulp_container/app/migrations/0042_update_manifest_and_manifestlistmanifest.py diff --git a/CHANGES/1767.feature b/CHANGES/1767.feature new file mode 100644 index 000000000..3ab279012 --- /dev/null +++ b/CHANGES/1767.feature @@ -0,0 +1 @@ +Added `architecture`, `os`, and `compressed_layers_size` fields to Manifest. diff --git a/pulp_container/app/management/commands/container-handle-image-data.py b/pulp_container/app/management/commands/container-handle-image-data.py index a8635f1e7..139b9a9eb 100644 --- a/pulp_container/app/management/commands/container-handle-image-data.py +++ b/pulp_container/app/management/commands/container-handle-image-data.py @@ -1,3 +1,4 @@ +import json from json.decoder import JSONDecodeError from gettext import gettext as _ @@ -39,10 +40,22 @@ class Command(BaseCommand): def handle(self, *args, **options): manifests_updated_count = 0 - manifests_v1 = Manifest.objects.filter(data__isnull=True, media_type=MEDIA_TYPE.MANIFEST_V1) + manifests_v1 = Manifest.objects.filter( + Q(media_type=MEDIA_TYPE.MANIFEST_V1), + Q(data__isnull=True) + | Q(architecture__isnull=True) + | Q(os__isnull=True) + | Q(compressed_layers_size__isnull=True), + ) manifests_updated_count += self.update_manifests(manifests_v1) - manifests_v2 = Manifest.objects.filter(Q(data__isnull=True) | Q(annotations={}, labels={})) + manifests_v2 = Manifest.objects.filter( + Q(data__isnull=True) + | Q(annotations={}, labels={}) + | Q(architecture__isnull=True) + | Q(os__isnull=True) + | Q(compressed_layers_size__isnull=True) + ) manifests_v2 = manifests_v2.exclude( media_type__in=[MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI, MEDIA_TYPE.MANIFEST_V1] ) @@ -68,6 +81,17 @@ def handle(self, *args, **options): def update_manifests(self, manifests_qs): manifests_updated_count = 0 manifests_to_update = [] + fields_to_update = [ + "annotations", + "labels", + "is_bootable", + "is_flatpak", + "data", + "os", + "architecture", + "compressed_layers_size", + ] + for manifest in manifests_qs.iterator(): # suppress non-existing/already migrated artifacts and corrupted JSON files with suppress(ObjectDoesNotExist, JSONDecodeError): @@ -76,7 +100,6 @@ def update_manifests(self, manifests_qs): manifests_to_update.append(manifest) if len(manifests_to_update) > 1000: - fields_to_update = ["annotations", "labels", "is_bootable", "is_flatpak", "data"] manifests_qs.model.objects.bulk_update( manifests_to_update, fields_to_update, @@ -85,7 +108,6 @@ def update_manifests(self, manifests_qs): manifests_to_update.clear() if manifests_to_update: - fields_to_update = ["annotations", "labels", "is_bootable", "is_flatpak", "data"] manifests_qs.model.objects.bulk_update( manifests_to_update, fields_to_update, @@ -103,8 +125,23 @@ def init_manifest(self, manifest): if not (manifest.annotations or manifest.labels): manifest.init_metadata(manifest_data) + if self.needs_os_arch_size_update(manifest): + self.init_manifest_os_arch_size(manifest) manifest._artifacts.clear() + return True + elif self.needs_os_arch_size_update(manifest): + self.init_manifest_os_arch_size(manifest) return True return False + + def needs_os_arch_size_update(self, manifest): + return manifest.media_type not in [MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI] and not ( + manifest.architecture or manifest.os or manifest.compressed_layers_size + ) + + def init_manifest_os_arch_size(self, manifest): + manifest_data = json.loads(manifest.data) + manifest.init_architecture_and_os(manifest_data) + manifest.init_compressed_layers_size(manifest_data) diff --git a/pulp_container/app/migrations/0042_update_manifest_and_manifestlistmanifest.py b/pulp_container/app/migrations/0042_update_manifest_and_manifestlistmanifest.py new file mode 100644 index 000000000..c2cfb2e94 --- /dev/null +++ b/pulp_container/app/migrations/0042_update_manifest_and_manifestlistmanifest.py @@ -0,0 +1,49 @@ +# Generated by Django 4.2.16 on 2024-10-02 12:04 +import warnings + +from django.db import migrations, models + +def print_warning_for_updating_manifest_fields(apps, schema_editor): + warnings.warn( + "Run 'pulpcore-manager container-handle-image-data' to update the manifests' " + "os, architecture, and compressed_layers_size fields." + ) + +class Migration(migrations.Migration): + + dependencies = [ + ('container', '0041_add_pull_through_pull_permissions'), + ] + + operations = [ + migrations.AddField( + model_name='manifest', + name='architecture', + field=models.TextField(null=True), + ), + migrations.AddField( + model_name='manifest', + name='compressed_layers_size', + field=models.IntegerField(null=True), + ), + migrations.AddField( + model_name='manifest', + name='os', + field=models.TextField(null=True), + ), + migrations.AlterField( + model_name='manifestlistmanifest', + name='architecture', + field=models.TextField(blank=True, default=''), + ), + migrations.AlterField( + model_name='manifestlistmanifest', + name='os', + field=models.TextField(blank=True, default=''), + ), + migrations.RunPython( + print_warning_for_updating_manifest_fields, + reverse_code=migrations.RunPython.noop, + elidable=True, + ), + ] diff --git a/pulp_container/app/models.py b/pulp_container/app/models.py index b96701c92..0597f04a9 100644 --- a/pulp_container/app/models.py +++ b/pulp_container/app/models.py @@ -77,6 +77,11 @@ class Manifest(Content): labels (models.JSONField): Metadata stored inside the image configuration. is_bootable (models.BooleanField): Indicates whether the image is bootable or not. is_flatpak (models.BooleanField): Indicates whether the image is a flatpak package or not. + architecture (models.TextField): CPU architecture for which the binaries in the image are + designed to run. + os (models.TextField): Operating System which the image is built to run on. + compressed_layers_size (models.IntegerField): Sum of the sizes, in bytes, of all compressed + layers. Relations: blobs (models.ManyToManyField): Many-to-many relationship with Blob. @@ -103,6 +108,9 @@ class Manifest(Content): annotations = models.JSONField(default=dict) labels = models.JSONField(default=dict) + architecture = models.TextField(null=True) + os = models.TextField(null=True) + compressed_layers_size = models.IntegerField(null=True) is_bootable = models.BooleanField(default=False) is_flatpak = models.BooleanField(default=False) @@ -176,6 +184,32 @@ def init_manifest_nature(self): else: return False + def init_architecture_and_os(self, manifest_data): + # schema1 has the architecture/os definition in the Manifest (not in the ConfigBlob) + if manifest_data.get("architecture", None) or manifest_data.get("os", None): + self.architecture = manifest_data.get("architecture", None) + self.os = manifest_data.get("os", None) + return + + manifest_config = manifest_data.get("config", None) + config_blob_sha256 = manifest_config.get("digest", None) + blob_artifact = Artifact.objects.get(sha256=config_blob_sha256.removeprefix("sha256:")) + config_blob, _ = get_content_data(blob_artifact) + self.architecture = config_blob.get("architecture", None) + self.os = config_blob.get("os", None) + + def init_compressed_layers_size(self, manifest_data): + # manifestv2 schema1 has only blobSum definition for each layer + if manifest_data.get("fsLayers", None): + self.compressed_layers_size = 0 + return + + layers = manifest_data.get("layers") + compressed_size = 0 + for layer in layers: + compressed_size += layer.get("size") + self.compressed_layers_size = compressed_size + def is_bootable_image(self): if ( self.annotations.get("containers.bootc") == "1" @@ -222,8 +256,9 @@ class ManifestListManifest(models.Model): manifest_list (models.ForeignKey): Many-to-one relationship with ManifestList. """ - architecture = models.TextField() - os = models.TextField() + # in oci-index spec, platform is an optional field + architecture = models.TextField(default="", blank=True) + os = models.TextField(default="", blank=True) os_version = models.TextField(default="", blank=True) os_features = models.TextField(default="", blank=True) features = models.TextField(default="", blank=True) diff --git a/pulp_container/app/registry.py b/pulp_container/app/registry.py index f907ef98d..40f7baeb0 100644 --- a/pulp_container/app/registry.py +++ b/pulp_container/app/registry.py @@ -438,10 +438,15 @@ async def run_pipeline(self, raw_text_manifest_data): ) async def init_pending_content(self, digest, manifest_data, media_type, raw_text_data): + + os = manifest_data.get("os", None) + architecture = manifest_data.get("architecture", None) if config := manifest_data.get("config", None): config_digest = config["digest"] config_blob = await self.save_config_blob(config_digest) await sync_to_async(self.repository.pending_blobs.add)(config_blob) + os = config.get("os") + architecture = config.get("architecture") else: config_blob = None @@ -453,12 +458,16 @@ async def init_pending_content(self, digest, manifest_data, media_type, raw_text media_type=media_type, config_blob=config_blob, data=raw_text_data, + os=os, + architecture=architecture, ) # skip if media_type of schema1 if media_type in (MEDIA_TYPE.MANIFEST_V2, MEDIA_TYPE.MANIFEST_OCI): await sync_to_async(manifest.init_metadata)(manifest_data=manifest_data) + await sync_to_async(manifest.init_compressed_layers_size)(manifest_data=manifest_data) + try: await manifest.asave() except IntegrityError: diff --git a/pulp_container/app/registry_api.py b/pulp_container/app/registry_api.py index 83c95e78e..1056be790 100644 --- a/pulp_container/app/registry_api.py +++ b/pulp_container/app/registry_api.py @@ -1203,7 +1203,7 @@ def put(self, request, path, pk=None): if is_manifest_list: manifests = {} for manifest in content_data.get("manifests"): - manifests[manifest["digest"]] = manifest["platform"] + manifests[manifest["digest"]] = manifest.get("platform", None) digests = set(manifests.keys()) @@ -1217,17 +1217,17 @@ def put(self, request, path, pk=None): manifests_to_list = [] for manifest in found_manifests: - platform = manifests[manifest.digest] manifest_to_list = models.ManifestListManifest( manifest_list=manifest, image_manifest=manifest_list, - architecture=platform["architecture"], - os=platform["os"], - features=platform.get("features", ""), - variant=platform.get("variant", ""), - os_version=platform.get("os.version", ""), - os_features=platform.get("os.features", ""), ) + if platform := manifests[manifest.digest]: + manifest_to_list.architecture = (platform["architecture"],) + manifest_to_list.os = (platform["os"],) + manifest_to_list.features = (platform.get("features", ""),) + manifest_to_list.variant = (platform.get("variant", ""),) + manifest_to_list.os_version = (platform.get("os.version", ""),) + manifest_to_list.os_features = (platform.get("os.features", ""),) manifests_to_list.append(manifest_to_list) models.ManifestListManifest.objects.bulk_create( @@ -1295,6 +1295,7 @@ def put(self, request, path, pk=None): config_blob = found_config_blobs.first() manifest = self._init_manifest(manifest_digest, media_type, raw_text_data, config_blob) manifest.init_metadata(manifest_data=content_data) + manifest.init_compressed_layers_size(manifest_data=content_data) manifest = self._save_manifest(manifest) @@ -1351,13 +1352,16 @@ def put(self, request, path, pk=None): return ManifestResponse(manifest, path, request, status=201) def _init_manifest(self, manifest_digest, media_type, raw_text_data, config_blob=None): - return models.Manifest( + manifest = models.Manifest( digest=manifest_digest, schema_version=2, media_type=media_type, config_blob=config_blob, data=raw_text_data, ) + if config_blob: + manifest.init_architecture_and_os(json.loads(raw_text_data)) + return manifest def _save_manifest(self, manifest): try: diff --git a/pulp_container/app/serializers.py b/pulp_container/app/serializers.py index 1287e49fe..eafcdca89 100644 --- a/pulp_container/app/serializers.py +++ b/pulp_container/app/serializers.py @@ -103,6 +103,21 @@ class ManifestSerializer(NoArtifactContentSerializer): default=False, help_text=_("A boolean determining whether the image bundles a Flatpak application"), ) + architecture = serializers.CharField( + help_text="The CPU architecture which the binaries in this image are built to run on.", + required=False, + default=None, + ) + os = serializers.CharField( + help_text="The name of the operating system which the image is built to run on.", + required=False, + default=None, + ) + compressed_layers_size = serializers.IntegerField( + help_text="Specifies the sum of the sizes, in bytes, of all compressed layers", + required=False, + default=None, + ) class Meta: fields = NoArtifactContentSerializer.Meta.fields + ( @@ -116,6 +131,9 @@ class Meta: "labels", "is_bootable", "is_flatpak", + "architecture", + "os", + "compressed_layers_size", ) model = models.Manifest diff --git a/pulp_container/app/tasks/builder.py b/pulp_container/app/tasks/builder.py index 80d87b339..18b418bfa 100644 --- a/pulp_container/app/tasks/builder.py +++ b/pulp_container/app/tasks/builder.py @@ -86,11 +86,15 @@ def add_image_from_directory_to_repository(path, repository, tag): config_blob = get_or_create_blob(manifest_json["config"], manifest, path) manifest.config_blob = config_blob - manifest.save() + manifest.init_architecture_and_os(manifest_json) pks_to_add = [] + compressed_size = 0 for layer in manifest_json["layers"]: + compressed_size += layer.get("size") pks_to_add.append(get_or_create_blob(layer, manifest, path).pk) + manifest.compressed_layers_size = compressed_size + manifest.save() pks_to_add.extend([manifest.pk, tag.pk, config_blob.pk]) new_repo_version.add_content(Content.objects.filter(pk__in=pks_to_add)) diff --git a/pulp_container/app/tasks/sync_stages.py b/pulp_container/app/tasks/sync_stages.py index 15ade2596..1099bc09c 100644 --- a/pulp_container/app/tasks/sync_stages.py +++ b/pulp_container/app/tasks/sync_stages.py @@ -332,12 +332,15 @@ async def handle_blobs(self, manifest_dc, content_data): Handle blobs. """ manifest_dc.extra_data["blob_dcs"] = [] + compressed_size = 0 for layer in content_data.get("layers") or content_data.get("fsLayers"): if not self._include_layer(layer): continue + compressed_size += layer.get("size", 0) blob_dc = self.create_blob(layer) manifest_dc.extra_data["blob_dcs"].append(blob_dc) await self.put(blob_dc) + manifest_dc.content.compressed_layers_size = compressed_size layer = content_data.get("config", None) if layer: blob_dc = self.create_blob(layer, deferred_download=False) @@ -390,8 +393,9 @@ def create_manifest(self, manifest_data, raw_text_data, media_type, digest=None) media_type=media_type, data=raw_text_data, annotations=manifest_data.get("annotations", {}), + architecture=manifest_data.get("architecture", None), + os=manifest_data.get("os", None), ) - manifest_dc = DeclarativeContent(content=manifest) return manifest_dc @@ -431,6 +435,8 @@ async def _download_and_instantiate_manifest(self, manifest_url, digest): media_type=media_type, data=raw_text_data, annotations=content_data.get("annotations", {}), + architecture=content_data.get("architecture", None), + os=content_data.get("os", None), ) return content_data, manifest @@ -470,13 +476,16 @@ async def create_listed_manifest(self, manifest_data): ) platform = {} - p = manifest_data["platform"] - platform["architecture"] = p["architecture"] - platform["os"] = p["os"] - platform["features"] = p.get("features", "") - platform["variant"] = p.get("variant", "") - platform["os.version"] = p.get("os.version", "") - platform["os.features"] = p.get("os.features", "") + # in oci-index spec, platform is an optional field + if p := manifest_data.get("platform", None): + platform["architecture"] = p["architecture"] + platform["os"] = p["os"] + platform["features"] = p.get("features", "") + platform["variant"] = p.get("variant", "") + platform["os.version"] = p.get("os.version", "") + platform["os.features"] = p.get("os.features", "") + manifest.os = p["os"] + manifest.architecture = p["architecture"] man_dc = DeclarativeContent(content=manifest) return {"manifest_dc": man_dc, "platform": platform, "content_data": content_data} @@ -626,19 +635,31 @@ def _post_save(self, batch): manifest_lists.append(dc.content) for listed_manifest in dc.extra_data["listed_manifests"]: manifest_dc = listed_manifest["manifest_dc"] - platform = listed_manifest["platform"] - manifest_list_manifests.append( - ManifestListManifest( - manifest_list=manifest_dc.content, - image_manifest=dc.content, - architecture=platform["architecture"], - os=platform["os"], - features=platform.get("features"), - variant=platform.get("variant"), - os_version=platform.get("os.version"), - os_features=platform.get("os.features"), - ) + manifest_list_manifest = ManifestListManifest( + manifest_list=manifest_dc.content, + image_manifest=dc.content, ) + if platform := listed_manifest.get("platform"): + manifest_list_manifest.architecture = platform["architecture"] + manifest_list_manifest.os = platform["os"] + manifest_list_manifest.features = platform.get("features") + manifest_list_manifest.variant = platform.get("variant") + manifest_list_manifest.os_version = platform.get("os.version") + manifest_list_manifest.os_features = platform.get("os.features") + manifest_list_manifests.append(manifest_list_manifest) + continue + + if "config_blob_dc" in dc.extra_data: + manifest_dc = dc.content + # if the os or architecture is already defined it means we could extract these + # values from manifest_list.platform.{os,architecture} + if manifest_dc.os or manifest_dc.architecture: + continue + + manifest_data = json.loads(manifest_dc.data) + manifest_dc.init_architecture_and_os(manifest_data) + manifest_dc.save() + if blob_manifests: BlobManifest.objects.bulk_create(blob_manifests, ignore_conflicts=True) if manifest_list_manifests: diff --git a/pulp_container/tests/functional/api/test_build_images.py b/pulp_container/tests/functional/api/test_build_images.py index 00574123d..f8b4b87f8 100644 --- a/pulp_container/tests/functional/api/test_build_images.py +++ b/pulp_container/tests/functional/api/test_build_images.py @@ -204,7 +204,12 @@ def test_invalid_containerfile_from_build_context( def test_without_build_context( - build_image, container_distribution_api, container_repo, gen_object_with_cleanup, local_registry + build_image, + container_distribution_api, + container_manifest_api, + container_repo, + gen_object_with_cleanup, + local_registry, ): """Test build with only a Containerfile (no additional files)""" @@ -232,3 +237,8 @@ def containerfile_without_context_files(): local_registry.pull(distribution.base_path) image = local_registry.inspect(distribution.base_path) assert image[0]["Config"]["Cmd"] == ["ls", "/"] + manifest = container_manifest_api.list(digest=image[0]["Digest"]) + manifests = manifest.to_dict()["results"] + assert any("amd" in manifest["architecture"] for manifest in manifests) + assert any("linux" in manifest["os"] for manifest in manifests) + assert any(manifest["compressed_layers_size"] > 0 for manifest in manifests) diff --git a/pulp_container/tests/functional/api/test_pull_through_cache.py b/pulp_container/tests/functional/api/test_pull_through_cache.py index f4f6ac01d..0eb42f7dc 100644 --- a/pulp_container/tests/functional/api/test_pull_through_cache.py +++ b/pulp_container/tests/functional/api/test_pull_through_cache.py @@ -38,6 +38,7 @@ def _add_pull_through_entities_to_cleanup(path): def pull_and_verify( anonymous_user, add_pull_through_entities_to_cleanup, + container_manifest_api, container_pull_through_distribution_api, container_distribution_api, container_repository_api, @@ -59,6 +60,13 @@ def _pull_and_verify(images, pull_through_distribution): local_registry.pull(local_image_path) local_image = local_registry.inspect(local_image_path) + # 1.1. check pulp manifest model fields + manifest = container_manifest_api.list(digest=local_image[0]["Digest"]) + manifests = manifest.to_dict()["results"] + assert any("amd" in manifest["architecture"] for manifest in manifests) + assert any("linux" in manifest["os"] for manifest in manifests) + assert any(manifest["compressed_layers_size"] > 0 for manifest in manifests) + path, tag = local_image_path.split(":") tags_to_verify.append(tag) diff --git a/pulp_container/tests/functional/api/test_push_content.py b/pulp_container/tests/functional/api/test_push_content.py index baf659f62..104f02239 100644 --- a/pulp_container/tests/functional/api/test_push_content.py +++ b/pulp_container/tests/functional/api/test_push_content.py @@ -39,6 +39,7 @@ def test_push_using_registry_client_admin( add_to_cleanup, registry_client, local_registry, + container_manifest_api, container_namespace_api, ): """Test push with official registry client and logged in as admin.""" @@ -48,6 +49,15 @@ def test_push_using_registry_client_admin( registry_client.pull(image_path) local_registry.tag_and_push(image_path, local_url) local_registry.pull(local_url) + + # check pulp manifest model fields + local_image = local_registry.inspect(local_url) + manifest = container_manifest_api.list(digest=local_image[0]["Digest"]) + manifests = manifest.to_dict()["results"] + assert any("amd" in manifest["architecture"] for manifest in manifests) + assert any("linux" in manifest["os"] for manifest in manifests) + assert any(manifest["compressed_layers_size"] > 0 for manifest in manifests) + # ensure that same content can be pushed twice without permission errors local_registry.tag_and_push(image_path, local_url) diff --git a/pulp_container/tests/functional/api/test_sync.py b/pulp_container/tests/functional/api/test_sync.py index bfb088fdd..84ecd4ca1 100644 --- a/pulp_container/tests/functional/api/test_sync.py +++ b/pulp_container/tests/functional/api/test_sync.py @@ -3,7 +3,12 @@ import pytest from pulpcore.tests.functional.utils import PulpTaskError -from pulp_container.tests.functional.constants import PULP_FIXTURE_1, PULP_LABELED_FIXTURE +from pulp_container.constants import MEDIA_TYPE +from pulp_container.tests.functional.constants import ( + PULP_FIXTURE_1, + PULP_LABELED_FIXTURE, + PULP_HELLO_WORLD_LINUX_AMD64_DIGEST, +) from pulp_container.tests.functional.constants import ( REGISTRY_V2_FEED_URL, @@ -39,8 +44,14 @@ def _synced_container_repository_factory( @pytest.mark.parallel -def test_basic_sync(container_repo, container_remote, container_repository_api, container_sync): - container_sync(container_repo, container_remote) +def test_basic_sync( + container_repo, + container_remote, + container_repository_api, + container_sync, + container_manifest_api, +): + repo_version = container_sync(container_repo, container_remote).created_resources[0] repository = container_repository_api.read(container_repo.pulp_href) assert "versions/1/" in repository.latest_version_href @@ -53,6 +64,16 @@ def test_basic_sync(container_repo, container_remote, container_repository_api, assert repository.latest_version_href == latest_version_href + manifest = container_manifest_api.list( + repository_version=repo_version, + media_type=[MEDIA_TYPE.MANIFEST_V2], + digest=PULP_HELLO_WORLD_LINUX_AMD64_DIGEST, + ) + manifests = manifest.to_dict()["results"] + assert any("amd" in manifest["architecture"] for manifest in manifests) + assert any("linux" in manifest["os"] for manifest in manifests) + assert any(manifest["compressed_layers_size"] > 0 for manifest in manifests) + @pytest.mark.parallel def test_sync_labelled_image(