Skip to content

Commit

Permalink
prevent empty statistics file (#628)
Browse files Browse the repository at this point in the history
* prevent empty statistics file

* Empty For CLA

* using fakefs instead of patching builtins.open/write
  • Loading branch information
mhmdk0 authored Dec 12, 2024
1 parent ea5a576 commit 848bb72
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 4 deletions.
7 changes: 7 additions & 0 deletions cli/medperf/commands/dataset/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,13 @@ def run_statistics(self):
self.dataset.unmark_as_ready()
raise e

with open(self.out_statistics_path) as f:
stats = yaml.safe_load(f)

if stats is None:
self.dataset.unmark_as_ready()
raise ExecutionError("Statistics file is empty.")

self.ui.print("> Statistics complete")

def mark_dataset_as_ready(self):
Expand Down
28 changes: 25 additions & 3 deletions cli/medperf/tests/commands/dataset/test_prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_get_prep_cube_downloads_cube_file(mocker, data_preparation, cube):

@pytest.mark.parametrize("dataset_for_test", [False, True])
def test_prepare_with_test_data_doesnt_send_reports(
mocker, data_preparation, dataset_for_test, cube, comms, fs
mocker, data_preparation, dataset_for_test, cube, comms, fs
):
# Arrange
data_preparation.dataset.for_test = dataset_for_test
Expand Down Expand Up @@ -86,7 +86,9 @@ def test_prepare_runs_then_stops_report_handler(
mocker.patch.object(data_preparation.dataset, "write")
mocked_obs = mocker.create_autospec(spec=Observer)
mocker.patch(PATCH_REGISTER.format("Observer"), side_effect=mocked_obs)
gen_report_spy = mocker.patch(PATCH_REGISTER.format("DataPreparation._DataPreparation__generate_report_dict"))
gen_report_spy = mocker.patch(
PATCH_REGISTER.format("DataPreparation._DataPreparation__generate_report_dict")
)

# Act
data_preparation.run_prepare()
Expand Down Expand Up @@ -208,11 +210,14 @@ def _failure_run(*args, **kwargs):

@pytest.mark.parametrize("metadata_specified", [False, True])
def test_statistics_checks_metadata_path(
mocker, data_preparation, metadata_specified, cube
mocker, data_preparation, metadata_specified, cube, fs
):
# Arrange
spy = mocker.patch.object(cube, "run")
data_preparation.metadata_specified = metadata_specified
data_preparation.out_statistics_path = "test.yaml"
fs.create_file(data_preparation.out_statistics_path, contents="")
mocker.patch("yaml.safe_load", return_value={})

# Act
data_preparation.run_statistics()
Expand All @@ -224,6 +229,23 @@ def test_statistics_checks_metadata_path(
assert "metadata_path" not in spy.call_args.kwargs.keys()


def test_preparation_fails_if_statistics_is_none(mocker, data_preparation, cube, fs):

# Arrange
unmark_spy = mocker.patch.object(data_preparation.dataset, "unmark_as_ready")
mocker.patch.object(cube, "run")
data_preparation.out_statistics_path = "test.yaml"
fs.create_file(data_preparation.out_statistics_path, contents="")
mocker.patch("yaml.safe_load", return_value=None)

# Act
with pytest.raises(ExecutionError):
data_preparation.run_statistics()

# Assert
unmark_spy.assert_called_once()


def test_dataset_is_updated_after_report_sending(mocker, data_preparation, comms):
# Arrange
send_spy = mocker.patch.object(comms, "update_dataset")
Expand Down
18 changes: 18 additions & 0 deletions server/dataset/migrations/0005_alter_dataset_generated_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.11 on 2024-11-29 16:23

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("dataset", "0004_auto_20231211_1827"),
]

operations = [
migrations.AlterField(
model_name="dataset",
name="generated_metadata",
field=models.JSONField(blank=True, default=dict),
),
]
2 changes: 1 addition & 1 deletion server/dataset/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class Dataset(models.Model):
state = models.CharField(
choices=DATASET_STATE, max_length=100, default="DEVELOPMENT"
)
generated_metadata = models.JSONField(default=dict, blank=True, null=True)
generated_metadata = models.JSONField(default=dict, blank=True)
user_metadata = models.JSONField(default=dict, blank=True, null=True)
report = models.JSONField(default=dict, blank=True, null=True)
created_at = models.DateTimeField(auto_now_add=True)
Expand Down

0 comments on commit 848bb72

Please sign in to comment.