From df70498346dc7e39fc6f682248d1f2dfad11d08e Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Fri, 17 Jan 2025 17:00:47 +0000 Subject: [PATCH] Check existing releases by ID Releases should always be created by Airlock now, with an airlock id which is used as the release ID. We can filter for existing releases on release ID, and avoid filtering with requested_files, which is a json field and filtering works differently. If we find an existing release with the airlock ID, we check the shas of the requested_files match. --- jobserver/releases.py | 24 ++++++++-- tests/unit/jobserver/api/test_releases.py | 16 ++++--- tests/unit/jobserver/test_releases.py | 56 ++++++++++++++++------- 3 files changed, 69 insertions(+), 27 deletions(-) diff --git a/jobserver/releases.py b/jobserver/releases.py index 575b67fd6..d53166c68 100644 --- a/jobserver/releases.py +++ b/jobserver/releases.py @@ -23,6 +23,10 @@ class ReleaseFileHashMismatch(Exception): pass +class ReleaseFilesMismatch(Exception): + pass + + def _build_paths(release, filename, data): """ Build the absolute path for a given filename as part of a release @@ -71,20 +75,27 @@ def build_outputs_zip(release_files, url_builder_func): @transaction.atomic def create_release(workspace, backend, created_by, requested_files, **kwargs): + # If this release creation comes from Airlock, a release id will be specified in the kwargs release_id = kwargs.pop("id", None) create_kwargs = dict( workspace=workspace, backend=backend, - requested_files=requested_files, defaults={ "created_by": created_by, + "requested_files": requested_files, **kwargs, }, ) + if release_id is not None: + # We know the release id, check for an existing release create_kwargs["id"] = release_id - - release, created = Release.objects.get_or_create(**create_kwargs) + release, created = Release.objects.get_or_create(**create_kwargs) + else: + created = True + release = Release.objects.create( + workspace=workspace, backend=backend, **create_kwargs["defaults"] + ) if created: for f in requested_files: @@ -98,6 +109,13 @@ def create_release(workspace, backend, created_by, requested_files, **kwargs): mtime=f["date"], metadata=f["metadata"], ) + else: + requested_files_shas = {rf["sha256"] for rf in requested_files} + existing_release_files_shas = {rf["sha256"] for rf in release.requested_files} + if requested_files_shas != existing_release_files_shas: + raise ReleaseFilesMismatch( + f"Requested files do not match files in existing release {release_id}'" + ) return release diff --git a/tests/unit/jobserver/api/test_releases.py b/tests/unit/jobserver/api/test_releases.py index 8087c58ca..375812368 100644 --- a/tests/unit/jobserver/api/test_releases.py +++ b/tests/unit/jobserver/api/test_releases.py @@ -1,6 +1,7 @@ import json import random import string +from datetime import UTC, datetime import pytest from django.contrib.auth.models import AnonymousUser @@ -816,7 +817,7 @@ def test_releaseworkspaceapi_post_create_release_with_oversized_file( assert response.data["files"][0]["size"][0].startswith("File size should be <16Mb.") -def test_releaseworkspaceapi_post_release_already_exists( +def test_releaseworkspaceapi_post_release_already_exists_no_airlock_id( api_rf, slack_messages, project_membership ): user = UserFactory(roles=[OutputChecker], username="test-output-checker") @@ -841,7 +842,7 @@ def test_releaseworkspaceapi_post_release_already_exists( "review": {}, } - # Create release via API with same data + # Create release via API with same data. There's no id to check against, so a new release is created request = api_rf.post( "/", data=data, @@ -857,8 +858,7 @@ def test_releaseworkspaceapi_post_release_already_exists( ) assert response.status_code == 201 - assert Release.objects.count() == 1 - assert Release.objects.latest("id").id == release.id + assert Release.objects.count() == 2 def test_releaseworkspaceapi_post_release_already_exists_with_airlock_id( @@ -871,7 +871,7 @@ def test_releaseworkspaceapi_post_release_already_exists_with_airlock_id( "url": "url", "size": 7, "sha256": "hash", - "date": timezone.now(), + "date": datetime(2025, 1, 15, 10, 30, 35, 99999, tzinfo=UTC), "metadata": {}, "review": None, } @@ -923,13 +923,15 @@ def test_releaseworkspaceapi_post_release_already_created_by_another_user( "review": None, } ] - release = ReleaseFactory(requested_files=files, created_by=user) + release = ReleaseFactory( + requested_files=files, created_by=user, id="01AAA1AAAAAAA1AAAAA11A1AAA" + ) BackendMembershipFactory(backend=release.backend, user=user) project_membership(project=release.workspace.project, user=user) data = { "files": files, - "metadata": {}, + "metadata": {"tool": "airlock", "airlock_id": "01AAA1AAAAAAA1AAAAA11A1AAA"}, "review": {}, } diff --git a/tests/unit/jobserver/test_releases.py b/tests/unit/jobserver/test_releases.py index 1195a7044..f36278530 100644 --- a/tests/unit/jobserver/test_releases.py +++ b/tests/unit/jobserver/test_releases.py @@ -8,10 +8,11 @@ from rest_framework.exceptions import NotFound from jobserver import releases -from jobserver.models import ReleaseFile +from jobserver.models import Release, ReleaseFile from jobserver.models.release_file import absolute_file_path from tests.factories import ( BackendFactory, + ReleaseFactory, ReleaseFileFactory, UserFactory, WorkspaceFactory, @@ -83,9 +84,9 @@ def test_build_outputs_zip_with_missing_files(build_release_with_files): assert "This file was redacted by" in zipped_contents, zipped_contents -def test_create_release_reupload_allowed(): +def test_create_release_already_exists(build_release): workspace = WorkspaceFactory(name="workspace") - rfile = ReleaseFileFactory(workspace=workspace, name="file1.txt", filehash="hash") + backend = BackendFactory() files = [ { @@ -99,13 +100,21 @@ def test_create_release_reupload_allowed(): } ] + existing_release = ReleaseFactory( + requested_files=files, workspace=workspace, backend=backend + ) + ReleaseFileFactory(release=existing_release, name="file1.txt") + assert Release.objects.count() == 1 + release = releases.create_release( workspace, - rfile.release.backend, - rfile.release.created_by, + backend, + existing_release.created_by, files, + id=existing_release.id, ) + assert release.id == existing_release.id assert release.requested_files == files assert release.files.count() == 1 @@ -114,9 +123,9 @@ def test_create_release_reupload_allowed(): rfile.size == 4 -def test_create_release_already_exists(): +def test_create_release_already_exists_file_mismatch(): workspace = WorkspaceFactory(name="workspace") - rfile = ReleaseFileFactory(workspace=workspace, name="file1.txt", filehash="hash") + backend = BackendFactory() files = [ { @@ -130,19 +139,32 @@ def test_create_release_already_exists(): } ] - release = releases.create_release( - workspace, - rfile.release.backend, - rfile.release.created_by, - files, + existing_release = ReleaseFactory( + requested_files=files, workspace=workspace, backend=backend ) + ReleaseFileFactory(release=existing_release, name="file1.txt") + assert Release.objects.count() == 1 - assert release.requested_files == files - assert release.files.count() == 1 + mismatched_files = [ + { + "name": "file2.txt", + "path": "path/to/file1.txt", + "url": "", + "size": 5, + "sha256": "hash1", + "date": "2022-08-17T13:37Z", + "metadata": {}, + } + ] - rfile = release.files.first() - rfile.filehash == "hash" - rfile.size == 4 + with pytest.raises(releases.ReleaseFilesMismatch): + releases.create_release( + workspace, + backend, + existing_release.created_by, + mismatched_files, + id=existing_release.id, + ) def test_create_release_success():