Skip to content

Commit

Permalink
Check existing releases by ID
Browse files Browse the repository at this point in the history
Releases should always be created by Airlock now, with an airlock
id which is used as the release ID. We can filter for existing
releases on release ID, and avoid filtering with requested_files,
which is a json field and filtering works differently.

If we find an existing release with the airlock ID, we check the
shas of the requested_files match.
  • Loading branch information
rebkwok committed Jan 17, 2025
1 parent 6f5c877 commit df70498
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 27 deletions.
24 changes: 21 additions & 3 deletions jobserver/releases.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ class ReleaseFileHashMismatch(Exception):
pass


class ReleaseFilesMismatch(Exception):
pass


def _build_paths(release, filename, data):
"""
Build the absolute path for a given filename as part of a release
Expand Down Expand Up @@ -71,20 +75,27 @@ def build_outputs_zip(release_files, url_builder_func):

@transaction.atomic
def create_release(workspace, backend, created_by, requested_files, **kwargs):
# If this release creation comes from Airlock, a release id will be specified in the kwargs
release_id = kwargs.pop("id", None)
create_kwargs = dict(
workspace=workspace,
backend=backend,
requested_files=requested_files,
defaults={
"created_by": created_by,
"requested_files": requested_files,
**kwargs,
},
)

if release_id is not None:
# We know the release id, check for an existing release
create_kwargs["id"] = release_id

release, created = Release.objects.get_or_create(**create_kwargs)
release, created = Release.objects.get_or_create(**create_kwargs)
else:
created = True
release = Release.objects.create(
workspace=workspace, backend=backend, **create_kwargs["defaults"]
)

if created:
for f in requested_files:
Expand All @@ -98,6 +109,13 @@ def create_release(workspace, backend, created_by, requested_files, **kwargs):
mtime=f["date"],
metadata=f["metadata"],
)
else:
requested_files_shas = {rf["sha256"] for rf in requested_files}
existing_release_files_shas = {rf["sha256"] for rf in release.requested_files}
if requested_files_shas != existing_release_files_shas:
raise ReleaseFilesMismatch(
f"Requested files do not match files in existing release {release_id}'"
)

return release

Expand Down
16 changes: 9 additions & 7 deletions tests/unit/jobserver/api/test_releases.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import random
import string
from datetime import UTC, datetime

import pytest
from django.contrib.auth.models import AnonymousUser
Expand Down Expand Up @@ -816,7 +817,7 @@ def test_releaseworkspaceapi_post_create_release_with_oversized_file(
assert response.data["files"][0]["size"][0].startswith("File size should be <16Mb.")


def test_releaseworkspaceapi_post_release_already_exists(
def test_releaseworkspaceapi_post_release_already_exists_no_airlock_id(
api_rf, slack_messages, project_membership
):
user = UserFactory(roles=[OutputChecker], username="test-output-checker")
Expand All @@ -841,7 +842,7 @@ def test_releaseworkspaceapi_post_release_already_exists(
"review": {},
}

# Create release via API with same data
# Create release via API with same data. There's no id to check against, so a new release is created
request = api_rf.post(
"/",
data=data,
Expand All @@ -857,8 +858,7 @@ def test_releaseworkspaceapi_post_release_already_exists(
)

assert response.status_code == 201
assert Release.objects.count() == 1
assert Release.objects.latest("id").id == release.id
assert Release.objects.count() == 2


def test_releaseworkspaceapi_post_release_already_exists_with_airlock_id(
Expand All @@ -871,7 +871,7 @@ def test_releaseworkspaceapi_post_release_already_exists_with_airlock_id(
"url": "url",
"size": 7,
"sha256": "hash",
"date": timezone.now(),
"date": datetime(2025, 1, 15, 10, 30, 35, 99999, tzinfo=UTC),
"metadata": {},
"review": None,
}
Expand Down Expand Up @@ -923,13 +923,15 @@ def test_releaseworkspaceapi_post_release_already_created_by_another_user(
"review": None,
}
]
release = ReleaseFactory(requested_files=files, created_by=user)
release = ReleaseFactory(
requested_files=files, created_by=user, id="01AAA1AAAAAAA1AAAAA11A1AAA"
)
BackendMembershipFactory(backend=release.backend, user=user)
project_membership(project=release.workspace.project, user=user)

data = {
"files": files,
"metadata": {},
"metadata": {"tool": "airlock", "airlock_id": "01AAA1AAAAAAA1AAAAA11A1AAA"},
"review": {},
}

Expand Down
56 changes: 39 additions & 17 deletions tests/unit/jobserver/test_releases.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
from rest_framework.exceptions import NotFound

from jobserver import releases
from jobserver.models import ReleaseFile
from jobserver.models import Release, ReleaseFile
from jobserver.models.release_file import absolute_file_path
from tests.factories import (
BackendFactory,
ReleaseFactory,
ReleaseFileFactory,
UserFactory,
WorkspaceFactory,
Expand Down Expand Up @@ -83,9 +84,9 @@ def test_build_outputs_zip_with_missing_files(build_release_with_files):
assert "This file was redacted by" in zipped_contents, zipped_contents


def test_create_release_reupload_allowed():
def test_create_release_already_exists(build_release):
workspace = WorkspaceFactory(name="workspace")
rfile = ReleaseFileFactory(workspace=workspace, name="file1.txt", filehash="hash")
backend = BackendFactory()

files = [
{
Expand All @@ -99,13 +100,21 @@ def test_create_release_reupload_allowed():
}
]

existing_release = ReleaseFactory(
requested_files=files, workspace=workspace, backend=backend
)
ReleaseFileFactory(release=existing_release, name="file1.txt")
assert Release.objects.count() == 1

release = releases.create_release(
workspace,
rfile.release.backend,
rfile.release.created_by,
backend,
existing_release.created_by,
files,
id=existing_release.id,
)

assert release.id == existing_release.id
assert release.requested_files == files
assert release.files.count() == 1

Expand All @@ -114,9 +123,9 @@ def test_create_release_reupload_allowed():
rfile.size == 4


def test_create_release_already_exists():
def test_create_release_already_exists_file_mismatch():
workspace = WorkspaceFactory(name="workspace")
rfile = ReleaseFileFactory(workspace=workspace, name="file1.txt", filehash="hash")
backend = BackendFactory()

files = [
{
Expand All @@ -130,19 +139,32 @@ def test_create_release_already_exists():
}
]

release = releases.create_release(
workspace,
rfile.release.backend,
rfile.release.created_by,
files,
existing_release = ReleaseFactory(
requested_files=files, workspace=workspace, backend=backend
)
ReleaseFileFactory(release=existing_release, name="file1.txt")
assert Release.objects.count() == 1

assert release.requested_files == files
assert release.files.count() == 1
mismatched_files = [
{
"name": "file2.txt",
"path": "path/to/file1.txt",
"url": "",
"size": 5,
"sha256": "hash1",
"date": "2022-08-17T13:37Z",
"metadata": {},
}
]

rfile = release.files.first()
rfile.filehash == "hash"
rfile.size == 4
with pytest.raises(releases.ReleaseFilesMismatch):
releases.create_release(
workspace,
backend,
existing_release.created_by,
mismatched_files,
id=existing_release.id,
)


def test_create_release_success():
Expand Down

0 comments on commit df70498

Please sign in to comment.