From e9c7dbf1e63b58b1c911f11173e2d151961cde0b Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Fri, 17 Jan 2025 09:11:29 +0000 Subject: [PATCH 1/2] Remove check for already uploaded release files Airlock prevents users from adding an already released (via Airlock) file to a release request. However, currently Airlock only knows about files released via Airlock. If a file has been released pre-Airlock, Airlock will allow it to released, and will error at job-server's check for already uploaded files. This will also allow us to get_or_create the release, so Airlock can attempt to re-release if something went wrong during file uploads. --- jobserver/api/releases.py | 9 +++----- jobserver/releases.py | 17 --------------- tests/unit/jobserver/test_releases.py | 31 +++------------------------ 3 files changed, 6 insertions(+), 51 deletions(-) diff --git a/jobserver/api/releases.py b/jobserver/api/releases.py index 808f5b216..19bc5dac7 100644 --- a/jobserver/api/releases.py +++ b/jobserver/api/releases.py @@ -302,12 +302,9 @@ def post(self, request, workspace_name): metadata = serializer.validated_data["metadata"] release_id = metadata.pop("airlock_id", None) - try: - release = releases.create_release( - workspace, backend, user, files, metadata=metadata, id=release_id - ) - except releases.ReleaseFileAlreadyExists as exc: - raise ValidationError({"detail": str(exc)}) + release = releases.create_release( + workspace, backend, user, files, metadata=metadata, id=release_id + ) slacks.notify_release_created(release) diff --git a/jobserver/releases.py b/jobserver/releases.py index ff6bd3362..e663832a7 100644 --- a/jobserver/releases.py +++ b/jobserver/releases.py @@ -69,25 +69,8 @@ def build_outputs_zip(release_files, url_builder_func): return in_memory_zf -def check_not_already_uploaded(workspace, filename, filehash, backend): - """Check if this workspace/filename/filehash combination has been uploaded before.""" - duplicate = ReleaseFile.objects.filter( - release__backend=backend, - workspace=workspace, - name=filename, - filehash=filehash, - ) - if duplicate.exists(): - raise ReleaseFileAlreadyExists( - f"This version of '{filename}' in workspace {workspace} has already been uploaded from backend '{backend.slug}'" - ) - - @transaction.atomic def create_release(workspace, backend, created_by, requested_files, **kwargs): - for f in requested_files: - check_not_already_uploaded(workspace, f["name"], f["sha256"], backend) - release = Release.objects.create( workspace=workspace, backend=backend, diff --git a/tests/unit/jobserver/test_releases.py b/tests/unit/jobserver/test_releases.py index 408ba171c..8d70d1dae 100644 --- a/tests/unit/jobserver/test_releases.py +++ b/tests/unit/jobserver/test_releases.py @@ -83,7 +83,7 @@ def test_build_outputs_zip_with_missing_files(build_release_with_files): assert "This file was redacted by" in zipped_contents, zipped_contents -def test_create_release_reupload(): +def test_create_release_reupload_allowed(): workspace = WorkspaceFactory(name="workspace") rfile = ReleaseFileFactory(workspace=workspace, name="file1.txt", filehash="hash") @@ -99,38 +99,13 @@ def test_create_release_reupload(): } ] - with pytest.raises(releases.ReleaseFileAlreadyExists): - releases.create_release( - workspace, - rfile.release.backend, - rfile.release.created_by, - files, - ) - - -def test_create_release_reupload_to_different_workspace(): - workspace = WorkspaceFactory(name="workspace") - another_workspace = WorkspaceFactory(name="another_workspace") - rfile = ReleaseFileFactory(workspace=workspace, name="file1.txt", filehash="hash") - - files = [ - { - "name": "file1.txt", - "path": "path/to/file1.txt", - "url": "", - "size": 4, - "sha256": "hash", - "date": "2022-08-17T13:37Z", - "metadata": {}, - } - ] - release = releases.create_release( - another_workspace, + workspace, rfile.release.backend, rfile.release.created_by, files, ) + assert release.requested_files == files assert release.files.count() == 1 From f06a6623585dcc8a7ccbf4cc0695df9163261415 Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Fri, 17 Jan 2025 09:55:22 +0000 Subject: [PATCH 2/2] Get or create release Check all the data passed to the create_release endpoint and return an existing release, if one matches. Allow for the endpoint to be called by a different user (in the event that a release is created by one output-checker, and then a re-release attempt is made by another output-checker). This can happen if not all files successfully upload from Airlock. The create_release is wrapped in transaction.atomic, so if we found and existing matching release, we should be able to safely assume that all the request files have already been created. --- jobserver/releases.py | 36 +++--- tests/unit/jobserver/api/test_releases.py | 141 ++++++++++++++++++---- tests/unit/jobserver/test_releases.py | 31 +++++ 3 files changed, 170 insertions(+), 38 deletions(-) diff --git a/jobserver/releases.py b/jobserver/releases.py index e663832a7..575b67fd6 100644 --- a/jobserver/releases.py +++ b/jobserver/releases.py @@ -71,25 +71,33 @@ def build_outputs_zip(release_files, url_builder_func): @transaction.atomic def create_release(workspace, backend, created_by, requested_files, **kwargs): - release = Release.objects.create( + release_id = kwargs.pop("id", None) + create_kwargs = dict( workspace=workspace, backend=backend, - created_by=created_by, requested_files=requested_files, - **kwargs, + defaults={ + "created_by": created_by, + **kwargs, + }, ) + if release_id is not None: + create_kwargs["id"] = release_id - for f in requested_files: - ReleaseFile.objects.create( - release=release, - workspace=release.workspace, - created_by=created_by, - name=f["name"], - filehash=f["sha256"], - size=f["size"], - mtime=f["date"], - metadata=f["metadata"], - ) + release, created = Release.objects.get_or_create(**create_kwargs) + + if created: + for f in requested_files: + ReleaseFile.objects.create( + release=release, + workspace=release.workspace, + created_by=created_by, + name=f["name"], + filehash=f["sha256"], + size=f["size"], + mtime=f["date"], + metadata=f["metadata"], + ) return release diff --git a/tests/unit/jobserver/api/test_releases.py b/tests/unit/jobserver/api/test_releases.py index 7878ed10a..8087c58ca 100644 --- a/tests/unit/jobserver/api/test_releases.py +++ b/tests/unit/jobserver/api/test_releases.py @@ -816,35 +816,79 @@ def test_releaseworkspaceapi_post_create_release_with_oversized_file( assert response.data["files"][0]["size"][0].startswith("File size should be <16Mb.") -def test_releaseworkspaceapi_post_release_already_exists(api_rf, project_membership): - user = UserFactory(roles=[OutputChecker]) - release = ReleaseFactory() - rfile = ReleaseFileFactory( - release=release, - workspace=release.workspace, - created_by=user, - name="file.txt", - filehash="hash", +def test_releaseworkspaceapi_post_release_already_exists( + api_rf, slack_messages, project_membership +): + user = UserFactory(roles=[OutputChecker], username="test-output-checker") + files = [ + { + "name": "file.txt", + "url": "url", + "size": 7, + "sha256": "hash", + "date": timezone.now(), + "metadata": {}, + "review": None, + } + ] + release = ReleaseFactory(requested_files=files, created_by=user) + BackendMembershipFactory(backend=release.backend, user=user) + project_membership(project=release.workspace.project, user=user) + + data = { + "files": files, + "metadata": {}, + "review": {}, + } + + # Create release via API with same data + request = api_rf.post( + "/", + data=data, + format="json", + headers={ + "authorization": release.backend.auth_token, + "os-user": user.username, + }, + ) + + response = ReleaseWorkspaceAPI.as_view(get_github_api=FakeGitHubAPI)( + request, workspace_name=release.workspace.name ) + assert response.status_code == 201 + assert Release.objects.count() == 1 + assert Release.objects.latest("id").id == release.id + + +def test_releaseworkspaceapi_post_release_already_exists_with_airlock_id( + api_rf, slack_messages, project_membership +): + user = UserFactory(roles=[OutputChecker], username="test-output-checker") + files = [ + { + "name": "file.txt", + "url": "url", + "size": 7, + "sha256": "hash", + "date": timezone.now(), + "metadata": {}, + "review": None, + } + ] + release = ReleaseFactory( + requested_files=files, created_by=user, id="01AAA1AAAAAAA1AAAAA11A1AAA" + ) BackendMembershipFactory(backend=release.backend, user=user) project_membership(project=release.workspace.project, user=user) data = { - "files": [ - { - "name": rfile.name, - "url": "url", - "size": 7, - "sha256": rfile.filehash, - "date": timezone.now(), - "metadata": {}, - "review": None, - } - ], - "metadata": {}, + "files": files, + "metadata": {"tool": "airlock", "airlock_id": "01AAA1AAAAAAA1AAAAA11A1AAA"}, "review": {}, } + + # Create release via API with same data request = api_rf.post( "/", data=data, @@ -859,9 +903,58 @@ def test_releaseworkspaceapi_post_release_already_exists(api_rf, project_members request, workspace_name=release.workspace.name ) - assert response.status_code == 400 - assert "file.txt" in response.data["detail"] - assert "already been uploaded" in response.data["detail"] + assert response.status_code == 201 + assert Release.objects.count() == 1 + assert Release.objects.latest("id").id == "01AAA1AAAAAAA1AAAAA11A1AAA" + + +def test_releaseworkspaceapi_post_release_already_created_by_another_user( + api_rf, slack_messages, project_membership +): + user = UserFactory(roles=[OutputChecker], username="test-output-checker") + files = [ + { + "name": "file.txt", + "url": "url", + "size": 7, + "sha256": "hash", + "date": timezone.now(), + "metadata": {}, + "review": None, + } + ] + release = ReleaseFactory(requested_files=files, created_by=user) + BackendMembershipFactory(backend=release.backend, user=user) + project_membership(project=release.workspace.project, user=user) + + data = { + "files": files, + "metadata": {}, + "review": {}, + } + + other_user = UserFactory(roles=[OutputChecker], username="other-output-checker") + BackendMembershipFactory(backend=release.backend, user=other_user) + # Create release via API with same data but different user + request = api_rf.post( + "/", + data=data, + format="json", + headers={ + "authorization": release.backend.auth_token, + "os-user": other_user.username, + }, + ) + + response = ReleaseWorkspaceAPI.as_view(get_github_api=FakeGitHubAPI)( + request, workspace_name=release.workspace.name + ) + + assert response.status_code == 201 + assert Release.objects.count() == 1 + latest_release = Release.objects.latest("id") + assert latest_release.id == release.id + assert latest_release.created_by == user def test_releaseworkspaceapi_post_unknown_workspace(api_rf): diff --git a/tests/unit/jobserver/test_releases.py b/tests/unit/jobserver/test_releases.py index 8d70d1dae..1195a7044 100644 --- a/tests/unit/jobserver/test_releases.py +++ b/tests/unit/jobserver/test_releases.py @@ -114,6 +114,37 @@ def test_create_release_reupload_allowed(): rfile.size == 4 +def test_create_release_already_exists(): + workspace = WorkspaceFactory(name="workspace") + rfile = ReleaseFileFactory(workspace=workspace, name="file1.txt", filehash="hash") + + files = [ + { + "name": "file1.txt", + "path": "path/to/file1.txt", + "url": "", + "size": 4, + "sha256": "hash", + "date": "2022-08-17T13:37Z", + "metadata": {}, + } + ] + + release = releases.create_release( + workspace, + rfile.release.backend, + rfile.release.created_by, + files, + ) + + assert release.requested_files == files + assert release.files.count() == 1 + + rfile = release.files.first() + rfile.filehash == "hash" + rfile.size == 4 + + def test_create_release_success(): backend = BackendFactory() workspace = WorkspaceFactory()