Skip to content

Commit

Permalink
Empty archive deps entry for PARQUET tables
Browse files Browse the repository at this point in the history
  • Loading branch information
hagenw committed Jun 21, 2024
1 parent 86d2c37 commit d0279f7
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 8 deletions.
6 changes: 4 additions & 2 deletions audb/core/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,19 +482,21 @@ def _add_meta(
self,
file: str,
version: str,
archive: str,
checksum: str,
):
r"""Add or update table file.
Args:
file: relative file path
archive: archive name without extension
checksum: checksum of file
version: version string
"""
format = audeer.file_extension(file).lower()
if format == "parquet":
archive = ""
else:
archive = os.path.splitext(file[3:])[0]

self._df.loc[file] = [
archive, # archive
Expand Down
2 changes: 1 addition & 1 deletion audb/core/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def _find_tables(
):
checksum = utils.md5(os.path.join(db_root, file))
if file not in deps or checksum != deps.checksum(file):
deps._add_meta(file, version, table, checksum)
deps._add_meta(file, version, checksum)
tables.append(table)

return tables
Expand Down
11 changes: 6 additions & 5 deletions tests/test_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,16 +532,17 @@ def test_add_media(deps, values):


@pytest.mark.parametrize(
"file, version, archive, checksum",
"file, version, checksum, expected_archive",
[
("db.table1.csv", "2.1.0", "table1", "asddfnfpork45rgfl"),
("db.table1.csv", "2.1.0", "asddfnfpork45rgfl", "table1"),
("db.table1.parquet", "2.1.0", "asddfnfpork45rgfl", ""),
],
)
def test_add_meta(deps, file, version, archive, checksum):
deps._add_meta(file, version, archive, checksum)
def test_add_meta(deps, file, version, checksum, expected_archive):
deps._add_meta(file, version, checksum)
assert len(deps) == len(ROWS) + 1
assert deps.version(file) == version
assert deps.archive(file) == archive
assert deps.archive(file) == expected_archive
assert deps.checksum(file) == checksum


Expand Down
4 changes: 4 additions & 0 deletions tests/test_publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -1162,6 +1162,10 @@ def expected_table_checksum(path: str) -> str:
assert deps.checksum(table_file) == expected_table_checksum(
audeer.path(build_dir, table_file)
)
if storage_format == "csv":
assert deps.archive(table_file) == table
elif storage_format == "parquet":
assert deps.archive(table_file) == ""

# Load database to cache
db = audb.load(name, version=version, verbose=False, full_path=False)
Expand Down

0 comments on commit d0279f7

Please sign in to comment.