From d0a35d54134b1d29d198c4b2e170bb407cdf334d Mon Sep 17 00:00:00 2001 From: Elod Illes Date: Thu, 19 Sep 2024 11:42:21 +0200 Subject: [PATCH 1/9] [CI] Remove openstack-tox-functional-py36-fips The job definition does not exist anymore and zuul config reports error because of this, so let's get rid of this from the check queue (gate queue does not even have it). Change-Id: I0b211b7c8fcfebeaf9f1bc9bbee0c6527faa53d9 --- .zuul.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.zuul.yaml b/.zuul.yaml index 3827f02a41..484cd36741 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -306,7 +306,6 @@ - release-notes-jobs-python3 check: jobs: - - openstack-tox-functional-py36-fips - openstack-tox-functional-py39 - glance-tox-functional-py39-rbac-defaults - glance-ceph-thin-provisioning: From 7815f4ed695e0c4f00988aaa06f8a4996167f378 Mon Sep 17 00:00:00 2001 From: elajkat Date: Wed, 18 Sep 2024 12:54:44 +0200 Subject: [PATCH 2/9] [unmaintained-only] Cap setuptools via virtualenv<20.26.4 py39 jobs (on ubuntu-focal) started to fail due to recent virtualenv release (20.26.4 that bundles setuptools 74.1.2) on Yoga, because we have 'packaging==21.3' in this branch that is not compatible with the new setuptools [1]. setuptools is bundled in virtualenv, so it has to be capped via the virtualenv package. tox also needed to be capped (<4) as gate uses tox 3.28.0, but with capping virtualenv we pull in latest tox as well, which would cause other errors. [1] https://github.com/pypa/setuptools/issues/4483 Change-Id: I668557933c51123ce84275a0c718b6e79b3df174 --- tox.ini | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tox.ini b/tox.ini index 826aee78b5..261d6773ff 100644 --- a/tox.ini +++ b/tox.ini @@ -6,6 +6,12 @@ skip_missing_interpreters = true # this allows tox to infer the base python from the environment name # and override any basepython configured in this file ignore_basepython_conflict=true +# Cap setuptools via virtualenv to prevent compatibility issue with yoga +# branch's upper constraint of 'packaging' package (21.3). +requires = + virtualenv<20.26.4 + tox<4 + setuptools<71.0.0 [testenv] # Set default python version From 95fae81787624b5dfe61f6bccc3183ebe50ee296 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Wed, 26 Jun 2024 08:41:02 -0700 Subject: [PATCH 3/9] Add safety check and detection support to FI tool This adds a safety check and detection mechanism to the tools/test_format_inspector.py utility for verifying those features outside of glance. Change-Id: I447e7e51315472f8fa6013d4c4852f54c1e0c43d (cherry picked from commit b27040b87e43ff4acfb6870ef0d13d54e5ca5caa) (cherry picked from commit b394ef00c3426771092d099cf1d96077bfa4b919) (cherry picked from commit aa12d39b453068d9ee8367591eb60d4a15cddece) (cherry picked from commit a1c94b47d02a7bd3ac076ea02a23e456da2d5a62) (cherry picked from commit a5ec69ba1c0a92cc9b91b52b4101b81a330ea684) --- tools/test_format_inspector.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/test_format_inspector.py b/tools/test_format_inspector.py index aa554386ef..63e23210cc 100755 --- a/tools/test_format_inspector.py +++ b/tools/test_format_inspector.py @@ -102,6 +102,13 @@ def main(): else: print('Confirmed size with qemu-img') + print('Image safety check: %s' % ( + fmt.safety_check() and 'passed' or 'FAILED')) + if args.input: + detected_fmt = format_inspector.detect_file_format(args.input) + print('Detected inspector for image as: %s' % ( + detected_fmt.__class__.__name__)) + if __name__ == '__main__': sys.exit(main()) From 8fb607e002926360fc87d1eec4d5078b7699a261 Mon Sep 17 00:00:00 2001 From: Brian Rosmaita Date: Thu, 9 May 2024 09:30:57 -0400 Subject: [PATCH 4/9] Adjust integrated-storage-import jobs We have to address two issues simultaneously: 1. handle multiple cirros images The change[1] configures multiple images for the devstack-tempest job and all the jobs deriving from it. This causes problem with the current post-check-metadata-injection.yaml playbook since we only expect one cirros image and perform steps accordingly. This change modifies the playbook to handle multiple cirros images. Failure in the glance-multistore-cinder-import job can be seen here[2]. [1] https://review.opendev.org/c/openstack/tempest/+/831018 [2] https://zuul.opendev.org/t/openstack/build/68a4d3ec6ce04c87b21d73a333f5b5cd/log/job-output.txt#23161 This was change I70b4a970d7e1. 2. change the the job config to address resource constraints * partial backport of I073216d1bbdd to change swap size (we don't include the change from that patch that was needed to address changes in Bobcat-era devstack) * backporting Ieb96eb6ceb6f to change tempest concurrency Remove openstack-tox-functional-py36-fips as there is no job definition for it. Change-Id: I6cd5b8bbf39f0cfae466f0800faea5a74e960e69 (cherry picked from commit 065e2d7e9e0c605cd4e60c9b150337b6ac6a9fde) (cherry picked from commit 28b05a7cc190a211f7b67ea6a3a9217dc1609ff8) --- .zuul.yaml | 2 ++ playbooks/post-check-metadata-injection.yaml | 16 ++++++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.zuul.yaml b/.zuul.yaml index 484cd36741..3a94e8f5f8 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -213,6 +213,8 @@ The regular tempest-integrated-storage job but with glance metadata injection post-run: playbooks/post-check-metadata-injection.yaml vars: + configure_swap_size: 8192 + tempest_concurrency: 3 zuul_copy_output: /etc/glance-remote: logs devstack_localrc: diff --git a/playbooks/post-check-metadata-injection.yaml b/playbooks/post-check-metadata-injection.yaml index 178866ba1b..2c273693a5 100644 --- a/playbooks/post-check-metadata-injection.yaml +++ b/playbooks/post-check-metadata-injection.yaml @@ -9,12 +9,16 @@ set -xe cirrosimg=$(glance image-list | grep cirros | cut -d" " -f 2) - echo "Dumping the cirros image for debugging..." - glance image-show $cirrosimg + # There could be more than one cirros image so traverse through the list + for image in $cirrosimg + do + echo "Dumping the cirros image for debugging..." + glance image-show $image - echo "Checking that the cirros image was decorated with metdata on import..." - glance image-list --property-filter 'glance_devstack_test=doyouseeme?' | grep cirros + echo "Checking that the cirros image was decorated with metdata on import..." + glance image-list --property-filter 'glance_devstack_test=doyouseeme?' | grep $image - echo "Checking that the cirros image was converted to raw on import..." - glance image-show $cirrosimg | egrep -e 'disk_format.*raw' + echo "Checking that the cirros image was converted to raw on import..." + glance image-show $image | egrep -e 'disk_format.*raw' + done environment: '{{ zuul | zuul_legacy_vars }}' From 7e06597fae3fc245ab04693883684c8c3a97fe2e Mon Sep 17 00:00:00 2001 From: Guillaume Espanel Date: Wed, 25 Jan 2023 11:53:09 +0100 Subject: [PATCH 5/9] Limit CaptureRegion sizes in format_inspector for VMDK and VHDX VMDK: When parsing a VMDK file to calculate its size, the format_inspector determines the location of the Descriptor section by reading two uint64 from the headers of the file and uses them to create the descriptor CaptureRegion. It would be possible to craft a VMDK file that commands the format_inspector to create a very big CaptureRegion, thus exhausting resources on the glance-api process. This patch binds the beginning of the descriptor to 0x200 and limits the size of the CaptureRegion to 1MB, similar to how the VMDK descriptor is parsed by qemu. VHDX: It is a bit more involved, but similar: when looking for the VIRTUAL_DISK_SIZE metadata, the format_inspector was creating an unbounded CaptureRegion. In the same way as it seems to be done in Qemu, we now limit the upper bound of this CaptureRegion. Closes-Bug: #2006490 Change-Id: I3ec5a33df20e1cfb6673f4ff1c7c91aacd065532 (cherry picked from commit d4d33ee30f303f783c0640cd72acb31b313e1164) (cherry picked from commit 06a18202ab52c64803f044b8f848ed1c160905d2) --- glance/common/format_inspector.py | 22 +++- .../unit/common/test_format_inspector.py | 120 ++++++++++++++++++ 2 files changed, 139 insertions(+), 3 deletions(-) diff --git a/glance/common/format_inspector.py b/glance/common/format_inspector.py index 351c300ddb..550cceadbb 100755 --- a/glance/common/format_inspector.py +++ b/glance/common/format_inspector.py @@ -345,6 +345,7 @@ class VHDXInspector(FileInspector): """ METAREGION = '8B7CA206-4790-4B9A-B8FE-575F050F886E' VIRTUAL_DISK_SIZE = '2FA54224-CD1B-4876-B211-5DBED83BF4B8' + VHDX_METADATA_TABLE_MAX_SIZE = 32 * 2048 # From qemu def __init__(self, *a, **k): super(VHDXInspector, self).__init__(*a, **k) @@ -459,6 +460,8 @@ def _find_meta_entry(self, desired_guid): item_offset, item_length, _reserved = struct.unpack( ' Date: Fri, 2 Feb 2024 11:48:24 +0100 Subject: [PATCH 6/9] Support Stream Optimized VMDKs Stream optimized VMDKs are also monolithic disks images, and start with the same sparse extend header as normal monolithic sparse files, so we can parse the virtual disk size in the same manner. See "VMware Virtual Disks Virtual Disk Format 1.1" p. 17. > Header and Footer > The header and the footer are both described by the same SparseExtentHeader > structure shown in Hosted Sparse Extent Header on page 8. Closes-Bug: #2052291 Change-Id: I7d63951ff080dc699b8d11babc0a5998d90621e4 Co-Authored-By: Rajiv Mucheli (cherry picked from commit 5e7e6bfb80ed6ede57f348e688c96dbdfe0e1c2b) (cherry picked from commit 9d45e8d4b87e992be23974a831811bff563ce721) (cherry picked from commit ef22a7425a5fe4f7006f9434a33dd092e2594830) (cherry picked from commit bcfc1e4dee268dc182d73f54729a840194c90d49) --- glance/common/format_inspector.py | 4 +- .../unit/common/test_format_inspector.py | 71 +++++++++++++------ 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/glance/common/format_inspector.py b/glance/common/format_inspector.py index 550cceadbb..d9576f1f8d 100755 --- a/glance/common/format_inspector.py +++ b/glance/common/format_inspector.py @@ -512,7 +512,7 @@ def __str__(self): # # https://www.vmware.com/app/vmdk/?src=vmdk class VMDKInspector(FileInspector): - """vmware VMDK format (monolithicSparse variant only) + """vmware VMDK format (monolithicSparse and streamOptimized variants only) This needs to store the 512 byte header and the descriptor region which should be just after that. The descriptor region is some @@ -582,7 +582,7 @@ def virtual_size(self): vmdktype = descriptor[type_idx:type_end] else: vmdktype = b'formatnotfound' - if vmdktype != b'monolithicSparse': + if vmdktype not in (b'monolithicSparse', b'streamOptimized'): LOG.warning('Unsupported VMDK format %s', vmdktype) return 0 diff --git a/glance/tests/unit/common/test_format_inspector.py b/glance/tests/unit/common/test_format_inspector.py index db6a9830bd..38f8caeb41 100644 --- a/glance/tests/unit/common/test_format_inspector.py +++ b/glance/tests/unit/common/test_format_inspector.py @@ -51,38 +51,51 @@ def tearDown(self): except Exception: pass - def _create_img(self, fmt, size): + def _create_img(self, fmt, size, subformat=None): if fmt == 'vhd': # QEMU calls the vhd format vpc fmt = 'vpc' - fn = tempfile.mktemp(prefix='glance-unittest-formatinspector-', + opt = '' + prefix = 'glance-unittest-formatinspector-' + + if subformat: + opt = ' -o subformat=%s' % subformat + prefix += subformat + '-' + + fn = tempfile.mktemp(prefix=prefix, suffix='.%s' % fmt) self._created_files.append(fn) subprocess.check_output( - 'qemu-img create -f %s %s %i' % (fmt, fn, size), + 'qemu-img create -f %s %s %s %i' % (fmt, opt, fn, size), shell=True) return fn - def _create_allocated_vmdk(self, size_mb): + def _create_allocated_vmdk(self, size_mb, subformat=None): # We need a "big" VMDK file to exercise some parts of the code of the # format_inspector. A way to create one is to first create an empty # file, and then to convert it with the -S 0 option. - fn = tempfile.mktemp(prefix='glance-unittest-formatinspector-', - suffix='.vmdk') + + if subformat is None: + # Matches qemu-img default, see `qemu-img convert -O vmdk -o help` + subformat = 'monolithicSparse' + + prefix = 'glance-unittest-formatinspector-%s-' % subformat + fn = tempfile.mktemp(prefix=prefix, suffix='.vmdk') self._created_files.append(fn) - zeroes = tempfile.mktemp(prefix='glance-unittest-formatinspector-', - suffix='.zero') - self._created_files.append(zeroes) + raw = tempfile.mktemp(prefix=prefix, suffix='.raw') + self._created_files.append(raw) - # Create an empty file + # Create a file with pseudo-random data, otherwise it will get + # compressed in the streamOptimized format subprocess.check_output( - 'dd if=/dev/zero of=%s bs=1M count=%i' % (zeroes, size_mb), + 'dd if=/dev/urandom of=%s bs=1M count=%i' % (raw, size_mb), shell=True) # Convert it to VMDK subprocess.check_output( - 'qemu-img convert -f raw -O vmdk -S 0 %s %s' % (zeroes, fn), + 'qemu-img convert -f raw -O vmdk -o subformat=%s -S 0 %s %s' % ( + subformat, raw, fn), shell=True) return fn @@ -101,8 +114,9 @@ def _test_format_at_block_size(self, format_name, img, block_size): wrapper.close() return fmt - def _test_format_at_image_size(self, format_name, image_size): - img = self._create_img(format_name, image_size) + def _test_format_at_image_size(self, format_name, image_size, + subformat=None): + img = self._create_img(format_name, image_size, subformat=subformat) # Some formats have internal alignment restrictions making this not # always exactly like image_size, so get the real value for comparison @@ -124,11 +138,12 @@ def _test_format_at_image_size(self, format_name, image_size): 'Format used more than 512KiB of memory: %s' % ( fmt.context_info)) - def _test_format(self, format_name): + def _test_format(self, format_name, subformat=None): # Try a few different image sizes, including some odd and very small # sizes for image_size in (512, 513, 2057, 7): - self._test_format_at_image_size(format_name, image_size * units.Mi) + self._test_format_at_image_size(format_name, image_size * units.Mi, + subformat=subformat) def test_qcow2(self): self._test_format('qcow2') @@ -142,12 +157,15 @@ def test_vhdx(self): def test_vmdk(self): self._test_format('vmdk') - def test_vmdk_bad_descriptor_offset(self): + def test_vmdk_stream_optimized(self): + self._test_format('vmdk', 'streamOptimized') + + def _test_vmdk_bad_descriptor_offset(self, subformat=None): format_name = 'vmdk' image_size = 10 * units.Mi descriptorOffsetAddr = 0x1c BAD_ADDRESS = 0x400 - img = self._create_img(format_name, image_size) + img = self._create_img(format_name, image_size, subformat=subformat) # Corrupt the header fd = open(img, 'r+b') @@ -167,7 +185,13 @@ def test_vmdk_bad_descriptor_offset(self): 'size %i block %i') % (format_name, image_size, block_size)) - def test_vmdk_bad_descriptor_mem_limit(self): + def test_vmdk_bad_descriptor_offset(self): + self._test_vmdk_bad_descriptor_offset() + + def test_vmdk_bad_descriptor_offset_stream_optimized(self): + self._test_vmdk_bad_descriptor_offset(subformat='streamOptimized') + + def _test_vmdk_bad_descriptor_mem_limit(self, subformat=None): format_name = 'vmdk' image_size = 5 * units.Mi virtual_size = 5 * units.Mi @@ -176,7 +200,8 @@ def test_vmdk_bad_descriptor_mem_limit(self): twoMBInSectors = (2 << 20) // 512 # We need a big VMDK because otherwise we will not have enough data to # fill-up the CaptureRegion. - img = self._create_allocated_vmdk(image_size // units.Mi) + img = self._create_allocated_vmdk(image_size // units.Mi, + subformat=subformat) # Corrupt the end of descriptor address so it "ends" at 2MB fd = open(img, 'r+b') @@ -200,6 +225,12 @@ def test_vmdk_bad_descriptor_mem_limit(self): 'Format used more than 1.5MiB of memory: %s' % ( fmt.context_info)) + def test_vmdk_bad_descriptor_mem_limit(self): + self._test_vmdk_bad_descriptor_mem_limit() + + def test_vmdk_bad_descriptor_mem_limit_stream_optimized(self): + self._test_vmdk_bad_descriptor_mem_limit(subformat='streamOptimized') + def test_vdi(self): self._test_format('vdi') From f7f7249b343bb7c1f6425a3dc11d926ecbbd40ba Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Mon, 1 Apr 2024 08:06:31 -0700 Subject: [PATCH 7/9] Reject qcow files with data-file attributes Change-Id: I6326a3e85c1ba4cb1da944a4323769f2399ed2c1 Closes-Bug: #2059809 (cherry picked from commit 2ca29af4433e9fa99a0a48e230d8d25d6eaa4a87) (cherry picked from commit c3586f3a122f6cb0663217b12b52203e74e2e4fa) (cherry picked from commit a92c438fb5ba55440b38cae7c8b4361b58daa9dd) (cherry picked from commit dba3bdb458aa8a5d0193f12b7f1e374a89ed34a2) (cherry picked from commit 6a38aef8baaf5caecbd8c866f1cf922d939dfbcc) --- glance/async_/flows/base_import.py | 10 ++++++ .../async_/flows/plugins/image_conversion.py | 8 +++++ .../flows/plugins/test_image_conversion.py | 16 +++++++++ glance/tests/unit/async_/flows/test_import.py | 33 +++++++++++++++++++ 4 files changed, 67 insertions(+) diff --git a/glance/async_/flows/base_import.py b/glance/async_/flows/base_import.py index e6bb526b4d..c0e2b72839 100644 --- a/glance/async_/flows/base_import.py +++ b/glance/async_/flows/base_import.py @@ -181,6 +181,16 @@ def execute(self, image_id): 'bfile': backing_file} raise RuntimeError(msg) + try: + data_file = metadata['format-specific']['data']['data-file'] + except KeyError: + data_file = None + if data_file is not None: + msg = _("File %(path)s has invalid data-file " + "%(dfile)s, aborting.") % {"path": path, + "dfile": data_file} + raise RuntimeError(msg) + return path def revert(self, image_id, result, **kwargs): diff --git a/glance/async_/flows/plugins/image_conversion.py b/glance/async_/flows/plugins/image_conversion.py index f817e80985..91e8cf944e 100644 --- a/glance/async_/flows/plugins/image_conversion.py +++ b/glance/async_/flows/plugins/image_conversion.py @@ -122,6 +122,14 @@ def _execute(self, action, file_path, **kwargs): raise RuntimeError( 'QCOW images with backing files are not allowed') + try: + data_file = metadata['format-specific']['data']['data-file'] + except KeyError: + data_file = None + if data_file is not None: + raise RuntimeError( + 'QCOW images with data-file set are not allowed') + if metadata.get('format') == 'vmdk': create_type = metadata.get( 'format-specific', {}).get( diff --git a/glance/tests/unit/async_/flows/plugins/test_image_conversion.py b/glance/tests/unit/async_/flows/plugins/test_image_conversion.py index 2056f5b0ef..44ea5ac28d 100644 --- a/glance/tests/unit/async_/flows/plugins/test_image_conversion.py +++ b/glance/tests/unit/async_/flows/plugins/test_image_conversion.py @@ -185,6 +185,22 @@ def test_image_convert_invalid_qcow(self): self.assertEqual('QCOW images with backing files are not allowed', str(e)) + def test_image_convert_invalid_qcow_data_file(self): + data = {'format': 'qcow2', + 'format-specific': { + 'data': { + 'data-file': '/etc/hosts', + }, + }} + + convert = self._setup_image_convert_info_fail() + with mock.patch.object(processutils, 'execute') as exc_mock: + exc_mock.return_value = json.dumps(data), '' + e = self.assertRaises(RuntimeError, + convert.execute, 'file:///test/path.qcow') + self.assertEqual('QCOW images with data-file set are not allowed', + str(e)) + def _test_image_convert_invalid_vmdk(self): data = {'format': 'vmdk', 'format-specific': { diff --git a/glance/tests/unit/async_/flows/test_import.py b/glance/tests/unit/async_/flows/test_import.py index 79f6b6de57..55d6f09283 100644 --- a/glance/tests/unit/async_/flows/test_import.py +++ b/glance/tests/unit/async_/flows/test_import.py @@ -178,6 +178,39 @@ def create_image(*args, **kwargs): self.assertFalse(os.path.exists(tmp_image_path)) self.assertTrue(os.path.exists(image_path)) + def test_import_flow_invalid_data_file(self): + self.config(engine_mode='serial', + group='taskflow_executor') + + img_factory = mock.MagicMock() + + executor = taskflow_executor.TaskExecutor( + self.context, + self.task_repo, + self.img_repo, + img_factory) + + self.task_repo.get.return_value = self.task + + def create_image(*args, **kwargs): + kwargs['image_id'] = UUID1 + return self.img_factory.new_image(*args, **kwargs) + + self.img_repo.get.return_value = self.image + img_factory.new_image.side_effect = create_image + + with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: + dmock.return_value = io.BytesIO(b"TEST_IMAGE") + + with mock.patch.object(putils, 'trycmd') as tmock: + out = json.dumps({'format-specific': + {'data': {'data-file': 'somefile'}}}) + tmock.return_value = (out, '') + e = self.assertRaises(RuntimeError, + executor.begin_processing, + self.task.task_id) + self.assertIn('somefile', str(e)) + def test_import_flow_revert_import_to_fs(self): self.config(engine_mode='serial', group='taskflow_executor') From a28d96bb8ca65e50be1efea3f3c0db82eed0bb62 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Tue, 16 Apr 2024 10:29:10 -0700 Subject: [PATCH 8/9] Extend format_inspector for QCOW safety This adds two properties to the QcowInspector that makes it able to indicate whether the file specifies a backing_file or data_file in the header. Both conditions are considered unsafe for our usage. To ease checking of this condition, a classmethod is added that takes a local filename and digests just enough of the file to assert that both conditions are false. Note for yoga backport:With older qemu installed one of the qemu-img create commands fails, let's skip it from unmaintained/yoga and below that. Change-Id: Iaf86b525397d41bd116999cabe0954a0a7efac65 Related-Bug: #2059809 (cherry picked from commit ae536bb394793c9a7a219cb498e03d5c81dbbbb7) (cherry picked from commit 2eba54e0821106097dfeceb424e53943fd090483) (cherry picked from commit 89dbbc838d606f461087e1494d19ddbcf9db0a38) (cherry picked from commit 4860024286256b028fabc5ed50274934c3dfdd8a) (cherry picked from commit f32d5b8ad865113d499a36f7507a085f583514f9) --- glance/common/format_inspector.py | 132 +++++++++++++++++- .../unit/common/test_format_inspector.py | 82 ++++++++++- 2 files changed, 209 insertions(+), 5 deletions(-) diff --git a/glance/common/format_inspector.py b/glance/common/format_inspector.py index d9576f1f8d..32f048c3f3 100755 --- a/glance/common/format_inspector.py +++ b/glance/common/format_inspector.py @@ -28,6 +28,14 @@ LOG = logging.getLogger(__name__) +def chunked_reader(fileobj, chunk_size=512): + while True: + chunk = fileobj.read(chunk_size) + if not chunk: + break + yield chunk + + class CaptureRegion(object): """Represents a region of a file we want to capture. @@ -176,10 +184,16 @@ def virtual_size(self): @property def actual_size(self): """Returns the total size of the file, usually smaller than - virtual_size. + virtual_size. NOTE: this will only be accurate if the entire + file is read and processed. """ return self._total_count + @property + def complete(self): + """Returns True if we have all the information needed.""" + return all(r.complete for r in self._capture_regions.values()) + def __str__(self): """The string name of this file format.""" return 'raw' @@ -194,6 +208,35 @@ def context_info(self): return {name: len(region.data) for name, region in self._capture_regions.items()} + @classmethod + def from_file(cls, filename): + """Read as much of a file as necessary to complete inspection. + + NOTE: Because we only read as much of the file as necessary, the + actual_size property will not reflect the size of the file, but the + amount of data we read before we satisfied the inspector. + + Raises ImageFormatError if we cannot parse the file. + """ + inspector = cls() + with open(filename, 'rb') as f: + for chunk in chunked_reader(f): + inspector.eat_chunk(chunk) + if inspector.complete: + # No need to eat any more data + break + if not inspector.complete or not inspector.format_match: + raise ImageFormatError('File is not in requested format') + return inspector + + def safety_check(self): + """Perform some checks to determine if this file is safe. + + Returns True if safe, False otherwise. It may raise ImageFormatError + if safety cannot be guaranteed because of parsing or other errors. + """ + return True + # The qcow2 format consists of a big-endian 72-byte header, of which # only a small portion has information we care about: @@ -202,15 +245,26 @@ def context_info(self): # 0 0x00 Magic 4-bytes 'QFI\xfb' # 4 0x04 Version (uint32_t, should always be 2 for modern files) # . . . +# 8 0x08 Backing file offset (uint64_t) # 24 0x18 Size in bytes (unint64_t) +# . . . +# 72 0x48 Incompatible features bitfield (6 bytes) # -# https://people.gnome.org/~markmc/qcow-image-format.html +# https://gitlab.com/qemu-project/qemu/-/blob/master/docs/interop/qcow2.txt class QcowInspector(FileInspector): """QEMU QCOW2 Format This should only require about 32 bytes of the beginning of the file - to determine the virtual size. + to determine the virtual size, and 104 bytes to perform the safety check. """ + + BF_OFFSET = 0x08 + BF_OFFSET_LEN = 8 + I_FEATURES = 0x48 + I_FEATURES_LEN = 8 + I_FEATURES_DATAFILE_BIT = 3 + I_FEATURES_MAX_BIT = 4 + def __init__(self, *a, **k): super(QcowInspector, self).__init__(*a, **k) self.new_region('header', CaptureRegion(0, 512)) @@ -220,6 +274,10 @@ def _qcow_header_data(self): struct.unpack('>4sIQIIQ', self.region('header').data[:32])) return magic, size + @property + def has_header(self): + return self.region('header').complete + @property def virtual_size(self): if not self.region('header').complete: @@ -236,9 +294,77 @@ def format_match(self): magic, size = self._qcow_header_data() return magic == b'QFI\xFB' + @property + def has_backing_file(self): + if not self.region('header').complete: + return None + if not self.format_match: + return False + bf_offset_bytes = self.region('header').data[ + self.BF_OFFSET:self.BF_OFFSET + self.BF_OFFSET_LEN] + # nonzero means "has a backing file" + bf_offset, = struct.unpack('>Q', bf_offset_bytes) + return bf_offset != 0 + + @property + def has_unknown_features(self): + if not self.region('header').complete: + return None + if not self.format_match: + return False + i_features = self.region('header').data[ + self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN] + + # This is the maximum byte number we should expect any bits to be set + max_byte = self.I_FEATURES_MAX_BIT // 8 + + # The flag bytes are in big-endian ordering, so if we process + # them in index-order, they're reversed + for i, byte_num in enumerate(reversed(range(self.I_FEATURES_LEN))): + if byte_num == max_byte: + # If we're in the max-allowed byte, allow any bits less than + # the maximum-known feature flag bit to be set + allow_mask = ((1 << self.I_FEATURES_MAX_BIT) - 1) + elif byte_num > max_byte: + # If we're above the byte with the maximum known feature flag + # bit, then we expect all zeroes + allow_mask = 0x0 + else: + # Any earlier-than-the-maximum byte can have any of the flag + # bits set + allow_mask = 0xFF + + if i_features[i] & ~allow_mask: + LOG.warning('Found unknown feature bit in byte %i: %s/%s', + byte_num, bin(i_features[byte_num] & ~allow_mask), + bin(allow_mask)) + return True + + return False + + @property + def has_data_file(self): + if not self.region('header').complete: + return None + if not self.format_match: + return False + i_features = self.region('header').data[ + self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN] + + # First byte of bitfield, which is i_features[7] + byte = self.I_FEATURES_LEN - 1 - self.I_FEATURES_DATAFILE_BIT // 8 + # Third bit of bitfield, which is 0x04 + bit = 1 << (self.I_FEATURES_DATAFILE_BIT - 1 % 8) + return bool(i_features[byte] & bit) + def __str__(self): return 'qcow2' + def safety_check(self): + return (not self.has_backing_file and + not self.has_data_file and + not self.has_unknown_features) + # The VHD (or VPC as QEMU calls it) format consists of a big-endian # 512-byte "footer" at the beginning of the file with various diff --git a/glance/tests/unit/common/test_format_inspector.py b/glance/tests/unit/common/test_format_inspector.py index 38f8caeb41..7199299ce8 100644 --- a/glance/tests/unit/common/test_format_inspector.py +++ b/glance/tests/unit/common/test_format_inspector.py @@ -51,18 +51,28 @@ def tearDown(self): except Exception: pass - def _create_img(self, fmt, size, subformat=None): + def _create_img(self, fmt, size, subformat=None, options=None, + backing_file=None): if fmt == 'vhd': # QEMU calls the vhd format vpc fmt = 'vpc' + if options is None: + options = {} opt = '' prefix = 'glance-unittest-formatinspector-' if subformat: - opt = ' -o subformat=%s' % subformat + options['subformat'] = subformat prefix += subformat + '-' + if options: + opt += '-o ' + ','.join('%s=%s' % (k, v) + for k, v in options.items()) + + if backing_file is not None: + opt += ' -b %s -F raw' % backing_file + fn = tempfile.mktemp(prefix=prefix, suffix='.%s' % fmt) self._created_files.append(fn) @@ -160,6 +170,15 @@ def test_vmdk(self): def test_vmdk_stream_optimized(self): self._test_format('vmdk', 'streamOptimized') + def test_from_file_reads_minimum(self): + img = self._create_img('qcow2', 10 * units.Mi) + file_size = os.stat(img).st_size + fmt = format_inspector.QcowInspector.from_file(img) + # We know everything we need from the first 512 bytes of a QCOW image, + # so make sure that we did not read the whole thing when we inspect + # a local file. + self.assertLess(fmt.actual_size, file_size) + def _test_vmdk_bad_descriptor_offset(self, subformat=None): format_name = 'vmdk' image_size = 10 * units.Mi @@ -231,6 +250,65 @@ def test_vmdk_bad_descriptor_mem_limit(self): def test_vmdk_bad_descriptor_mem_limit_stream_optimized(self): self._test_vmdk_bad_descriptor_mem_limit(subformat='streamOptimized') + def test_qcow2_safety_checks(self): + # Create backing and data-file names (and initialize the backing file) + backing_fn = tempfile.mktemp(prefix='backing') + self._created_files.append(backing_fn) + with open(backing_fn, 'w') as f: + f.write('foobar') + data_fn = tempfile.mktemp(prefix='data') + self._created_files.append(data_fn) + + # A qcow with no backing or data file is safe + fn = self._create_img('qcow2', 5 * units.Mi, None) + inspector = format_inspector.QcowInspector.from_file(fn) + self.assertTrue(inspector.safety_check()) + + # A backing file makes it unsafe + fn = self._create_img('qcow2', 5 * units.Mi, None, + backing_file=backing_fn) + inspector = format_inspector.QcowInspector.from_file(fn) + self.assertFalse(inspector.safety_check()) + + # Note(lajoskatona): This image create fails on bionic due to + # old qemu-img utilities, let's skip this only test from yoga + # A data-file makes it unsafe + # fn = self._create_img('qcow2', 5 * units.Mi, + # options={'data_file': data_fn, + # 'data_file_raw': 'on'}) + # inspector = format_inspector.QcowInspector.from_file(fn) + # self.assertFalse(inspector.safety_check()) + + # Trying to load a non-QCOW file is an error + self.assertRaises(format_inspector.ImageFormatError, + format_inspector.QcowInspector.from_file, + backing_fn) + + def test_qcow2_feature_flag_checks(self): + data = bytearray(512) + data[0:4] = b'QFI\xFB' + inspector = format_inspector.QcowInspector() + inspector.region('header').data = data + + # All zeros, no feature flags - all good + self.assertFalse(inspector.has_unknown_features) + + # A feature flag set in the first byte (highest-order) is not + # something we know about, so fail. + data[0x48] = 0x01 + self.assertTrue(inspector.has_unknown_features) + + # The first bit in the last byte (lowest-order) is known (the dirty + # bit) so that should pass + data[0x48] = 0x00 + data[0x4F] = 0x01 + self.assertFalse(inspector.has_unknown_features) + + # Currently (as of 2024), the high-order feature flag bit in the low- + # order byte is not assigned, so make sure we reject it. + data[0x4F] = 0x80 + self.assertTrue(inspector.has_unknown_features) + def test_vdi(self): self._test_format('vdi') From 20aeba3892b6d25cc8051d72fbfe09d6456e6f68 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Thu, 18 Apr 2024 14:51:52 -0700 Subject: [PATCH 9/9] Add VMDK safety check This makes us check the extent filenames to make sure they don't have any banned characters in them (i.e. slashes). It also makes us reject VMDK files with a footer. Since we process these files as a stream, we can't honor a footer that directs us to find the descriptor block in a location we've already processed. Thus, if a file indicates it has a footer, consider it a policy exception and unsupported. Change-Id: I4a1c6dff7854c49940a0ac7988722aa6befc04fa (cherry picked from commit c1bf35dffb7f4c3090b1f04cf0e27cb431330c3e) (cherry picked from commit d3f1d6159c0218ac01e8d881e2ec4da71fc952ee) (cherry picked from commit 2dd4d138d4b8e1d9ca69fc0dda3711553a65d912) (cherry picked from commit 812e56d19f456e99e2277b99046330e1ad6a4ca7) (cherry picked from commit 05650e16f1dc6fde71b51e9e9bd9494226047c4a) --- glance/common/format_inspector.py | 64 ++++++++++++++++++- .../unit/common/test_format_inspector.py | 56 ++++++++++++++++ 2 files changed, 118 insertions(+), 2 deletions(-) diff --git a/glance/common/format_inspector.py b/glance/common/format_inspector.py index 32f048c3f3..a11ff1a5e0 100755 --- a/glance/common/format_inspector.py +++ b/glance/common/format_inspector.py @@ -650,6 +650,7 @@ class VMDKInspector(FileInspector): # at 0x200 and 1MB - 1 DESC_OFFSET = 0x200 DESC_MAX_SIZE = (1 << 20) - 1 + GD_AT_END = 0xffffffffffffffff def __init__(self, *a, **k): super(VMDKInspector, self).__init__(*a, **k) @@ -662,8 +663,9 @@ def post_process(self): if not self.region('header').complete: return - sig, ver, _flags, _sectors, _grain, desc_sec, desc_num = struct.unpack( - '<4sIIQQQQ', self.region('header').data[:44]) + (sig, ver, _flags, _sectors, _grain, desc_sec, desc_num, + _numGTEsperGT, _rgdOffset, gdOffset) = struct.unpack( + '<4sIIQQQQIQQ', self.region('header').data[:64]) if sig != b'KDMV': raise ImageFormatError('Signature KDMV not found: %r' % sig) @@ -671,6 +673,11 @@ def post_process(self): if ver not in (1, 2, 3): raise ImageFormatError('Unsupported format version %i' % ver) + if gdOffset == self.GD_AT_END: + # This means we have a footer, which takes precedence over the + # header, which we cannot support since we stream. + raise ImageFormatError('Unsupported VMDK footer') + # Since we parse both desc_sec and desc_num (the location of the # VMDK's descriptor, expressed in 512 bytes sectors) we enforce a # check on the bounds to create a reasonable CaptureRegion. This @@ -718,6 +725,59 @@ def virtual_size(self): return sectors * 512 + def safety_check(self): + if (not self.has_region('descriptor') or + not self.region('descriptor').complete): + return False + + try: + # Descriptor is padded to 512 bytes + desc_data = self.region('descriptor').data.rstrip(b'\x00') + # Descriptor is actually case-insensitive ASCII text + desc_text = desc_data.decode('ascii').lower() + except UnicodeDecodeError: + LOG.error('VMDK descriptor failed to decode as ASCII') + raise ImageFormatError('Invalid VMDK descriptor data') + + extent_access = ('rw', 'rdonly', 'noaccess') + header_fields = [] + extents = [] + ddb = [] + + # NOTE(danms): Cautiously parse the VMDK descriptor. Each line must + # be something we understand, otherwise we refuse it. + for line in [x.strip() for x in desc_text.split('\n')]: + if line.startswith('#') or not line: + # Blank or comment lines are ignored + continue + elif line.startswith('ddb'): + # DDB lines are allowed (but not used by us) + ddb.append(line) + elif '=' in line and ' ' not in line.split('=')[0]: + # Header fields are a single word followed by an '=' and some + # value + header_fields.append(line) + elif line.split(' ')[0] in extent_access: + # Extent lines start with one of the three access modes + extents.append(line) + else: + # Anything else results in a rejection + LOG.error('Unsupported line %r in VMDK descriptor', line) + raise ImageFormatError('Invalid VMDK descriptor data') + + # Check all the extent lines for concerning content + for extent_line in extents: + if '/' in extent_line: + LOG.error('Extent line %r contains unsafe characters', + extent_line) + return False + + if not extents: + LOG.error('VMDK file specified no extents') + return False + + return True + def __str__(self): return 'vmdk' diff --git a/glance/tests/unit/common/test_format_inspector.py b/glance/tests/unit/common/test_format_inspector.py index 7199299ce8..ceb0c3caab 100644 --- a/glance/tests/unit/common/test_format_inspector.py +++ b/glance/tests/unit/common/test_format_inspector.py @@ -309,6 +309,62 @@ def test_qcow2_feature_flag_checks(self): data[0x4F] = 0x80 self.assertTrue(inspector.has_unknown_features) + def test_vmdk_safety_checks(self): + region = format_inspector.CaptureRegion(0, 0) + inspector = format_inspector.VMDKInspector() + inspector.new_region('descriptor', region) + + # This should be a legit VMDK descriptor which comments, blank lines, + # an extent, some ddb content, and some header values. + legit_desc = ['# This is a comment', + '', + ' ', + 'createType=monolithicSparse', + 'RW 1234 SPARSE "foo.vmdk"', + 'ddb.adapterType = "MFM', + '# EOF'] + region.data = ('\n'.join(legit_desc)).encode('ascii') + region.length = len(region.data) + self.assertTrue(inspector.safety_check()) + + # Any of these lines should trigger an error indicating that there is + # something in the descriptor we don't understand + bad_lines = [ + '#\U0001F4A9', + 'header Name=foo', + 'foo bar', + 'WR 123 SPARSE "foo.vmdk"', + ] + + for bad_line in bad_lines: + # Encode as UTF-8 purely so we can test that anything non-ASCII + # will trigger the decode check + region.data = bad_line.encode('utf-8') + region.length = len(region.data) + self.assertRaisesRegex(format_inspector.ImageFormatError, + 'Invalid VMDK descriptor', + inspector.safety_check) + + # Extents with slashes in the name fail the safety check + region.data = b'RW 123 SPARSE "/etc/shadow"' + region.length = len(region.data) + self.assertFalse(inspector.safety_check()) + + # A descriptor that specifies no extents fails the safety check + region.data = b'# Nothing' + region.length = len(region.data) + self.assertFalse(inspector.safety_check()) + + def test_vmdk_reject_footer(self): + data = struct.pack('<4sIIQQQQIQQ', b'KDMV', 3, 0, 0, 0, 0, 1, 0, 0, + format_inspector.VMDKInspector.GD_AT_END) + inspector = format_inspector.VMDKInspector() + inspector.region('header').data = data + inspector.region('header').length = len(data) + self.assertRaisesRegex(format_inspector.ImageFormatError, + 'footer', + inspector.post_process) + def test_vdi(self): self._test_format('vdi')