Skip to content

Commit

Permalink
Merge pull request #92 from UCSD-E4E/37-zip-command
Browse files Browse the repository at this point in the history
37 zip command
  • Loading branch information
ntlhui authored Jan 11, 2025
2 parents e5b4340 + 1b573b5 commit 24b24d5
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 20 deletions.
29 changes: 9 additions & 20 deletions e4e_data_management/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
from __future__ import annotations

import datetime as dt
import fnmatch
import logging
import pickle
import re
from pathlib import Path
from shutil import copy2, rmtree
from typing import Dict, Iterable, List, Optional, Set
Expand Down Expand Up @@ -282,24 +280,7 @@ def push(self, path: Path) -> None:
Args:
path (Path): Destination to push completed dataset to
"""
if any(len(mission.staged_files) != 0
for mission in self.active_dataset.missions.values()) or \
len(self.active_dataset.staged_files) != 0:
raise RuntimeError('Files still in staging')

# Check that the README is present
readmes = [file
for file in list(self.active_dataset.root.glob('*'))
if re.match(fnmatch.translate('readme.*'), file.name, re.IGNORECASE)]

if len(readmes) == 0:
raise RuntimeError('Readme not found')
acceptable_exts = ['.md', '.docx']
if not any(readme.suffix.lower() in acceptable_exts for readme in readmes):
raise RuntimeError('Illegal README format')

# validate self
self.active_dataset.validate()
self.active_dataset.check_complete()

# Duplicate to destination
destination = path.joinpath(self.active_dataset.name)
Expand All @@ -317,6 +298,14 @@ def zip(self, output_path: Path) -> None:
Args:
output_path (Path): Output path
"""
if output_path.suffix.lower() != '.zip':
output_path = output_path.joinpath(
self.active_dataset.name + '.zip')

output_path.parent.mkdir(parents=True, exist_ok=True)
self.active_dataset.check_complete()

self.active_dataset.create_zip(output_path)

def unzip(self, input_file: Path, output_path: Path) -> None:
"""This will unzip the archived dataset to the specified root
Expand Down
52 changes: 52 additions & 0 deletions e4e_data_management/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,23 @@
from __future__ import annotations

import datetime as dt
import fnmatch
import json
import logging
import pickle
import re
import zipfile
from dataclasses import dataclass
from hashlib import sha256
from pathlib import Path
from shutil import copy2
from typing import (Callable, Dict, Generator, Iterable, List, Optional, Set,
Union)

from e4e_data_management.exception import (CorruptedDataset,
MissionFilesInStaging,
ReadmeFilesInStaging,
ReadmeNotFound)
from e4e_data_management.metadata import Metadata


Expand Down Expand Up @@ -515,3 +522,48 @@ def commit(self) -> List[Path]:
committed_files.extend(new_files)
self.staged_files = []
return committed_files

def create_zip(self, zip_path: Path) -> None:
"""Creates a .zip archive of this Dataset at the specified location
Args:
zip_path (Path): Path to .zip archive
"""
if zip_path.suffix.lower() != '.zip':
raise RuntimeError('Invalid suffix')

with zipfile.ZipFile(file=zip_path, mode='w') as handle:
manifest = self.manifest.get_dict()
for file in manifest:
src_path = self.root.joinpath(file)
dest = Path(self.name) / file
handle.write(filename=src_path, arcname=dest)

def check_complete(self) -> None:
"""Checks if the dataset is complete
Raises:
MissionFilesInStaging: Mission files remain in staging
ReadmeFilesInStaging: Readme files remain in staging
ReadmeNotFound: Readme files not found
ReadmeNotFound: Readme files with acceptable extension not found
CorruptedDataset: Dataset checksum validation failed
"""
staged_mission_files = (mission.staged_files
for mission in self.missions.values())
if any(len(staged) for staged in staged_mission_files):
raise MissionFilesInStaging
if len(self.staged_files) != 0:
raise ReadmeFilesInStaging

readmes = [file for file in self.root.glob('*')
if re.match(fnmatch.translate('readme.*'), file.name, re.IGNORECASE)]
if len(readmes) == 0:
raise ReadmeNotFound

acceptable_exts = ['.md', '.docx']
if not any(readme.suffix.lower() in acceptable_exts for readme in readmes):
raise ReadmeNotFound('Acceptable extension not found')

if not self.validate():
raise CorruptedDataset
28 changes: 28 additions & 0 deletions e4e_data_management/exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
'''E4E Data Management Exceptions
'''
from abc import ABC


class Incomplete(Exception, ABC):
"""Dataset not complete
"""


class MissionFilesInStaging(Incomplete):
"""Mission files still in staging area
"""


class ReadmeFilesInStaging(Incomplete):
"""Readme files still in staging area
"""


class ReadmeNotFound(Incomplete):
"""Readme files not found
"""


class CorruptedDataset(Exception):
"""Corrupted Dataset
"""
47 changes: 47 additions & 0 deletions tests/test_zip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
'''Tests zipping
'''
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Tuple
from unittest.mock import Mock
import zipfile
from e4e_data_management.core import DataManager

SingleMissionFixture = Tuple[Tuple[Mock,
DataManager, Path], Tuple[Path, int, int]]


def test_zip_to_dir(single_mission_data: SingleMissionFixture,
test_readme: Path):
"""Tests zipping data
Args:
single_mission(SingleMissionFixture): Single Mission test fixture
test_readme (Path): Test Readme
"""
test_app, _ = single_mission_data
_, app, _ = test_app

app.add([test_readme], readme=True)
app.commit(readme=True)
with TemporaryDirectory() as target_dir:
zip_path = Path(target_dir)
app.zip(zip_path)

final_path = zip_path.joinpath(app.active_dataset.name + '.zip')
assert final_path.is_file()

with zipfile.ZipFile(file=final_path, mode='r') as handle:
assert handle.testzip() is None
manifest = app.active_dataset.manifest.get_dict()
for name in handle.filelist:
ar_name = Path(name.filename).relative_to(
app.active_dataset.name)
assert ar_name.as_posix() in manifest

handle.extractall(target_dir)

app.active_dataset.manifest.validate(
manifest=manifest,
files=Path(app.active_dataset.name).rglob('*')
)

0 comments on commit 24b24d5

Please sign in to comment.