Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pandas support #53

Merged
merged 12 commits into from
Jan 9, 2024
145 changes: 142 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ repository = "https://github.com/biomarkersParkinson/tsdf"
[tool.poetry.dependencies]
python = "^3.9"
numpy = "^1.24.1"
pandas = "^2.1.3"

[tool.poetry.group.dev.dependencies]
pytest = "^7.2.0"
Expand Down
4 changes: 4 additions & 0 deletions src/tsdf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@

from .write_binary import (
write_binary_file,
write_dataframe_to_binaries,
)
from .read_binary import (
load_binary_from_metadata,
load_binaries_to_dataframe,
)

from .tsdfmetadata import TSDFMetadata
Expand All @@ -31,7 +33,9 @@
'load_metadata_legacy_file',
'write_metadata',
'write_binary_file',
'write_dataframe_to_binaries',
'load_binary_from_metadata',
'load_binaries_to_dataframe',
'TSDFMetadata',
'constants'
]
20 changes: 20 additions & 0 deletions src/tsdf/read_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,30 @@

import os
kretep marked this conversation as resolved.
Show resolved Hide resolved
import numpy as np
import pandas as pd
from tsdf import numpy_utils
from tsdf import tsdfmetadata


def load_binaries_to_dataframe(metadatas: '[tsdfmetadata.TSDFMetadata]') -> pd.DataFrame:
"""
Load binary files associated with TSDF and return a combined pandas DataFrame.

:param metadatas: list of TSDFMetadata objects.

kretep marked this conversation as resolved.
Show resolved Hide resolved
:return: pandas DataFrame containing the combined data.
"""
# Load the data
dataFrames = []
for metadata in metadatas:
data = load_binary_from_metadata(metadata)
df = pd.DataFrame(data, columns=metadata.channels)
dataFrames.append(df)

# Merge the data
return pd.concat(dataFrames, axis=1)
vedran-kasalica marked this conversation as resolved.
Show resolved Hide resolved


def load_binary_from_metadata(
metadata: 'tsdfmetadata.TSDFMetadata', start_row: int = 0, end_row: int = -1
) -> np.ndarray:
Expand Down
4 changes: 2 additions & 2 deletions src/tsdf/tsdfmetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class TSDFMetadata:

def __init__(
self, dictionary: Dict[str, Any], dir_path: str, file_name: str = ""
kretep marked this conversation as resolved.
Show resolved Hide resolved
) -> None:
) -> None: #TODO: why not get file_name from dictionary? Which one is used; is this clear?
kretep marked this conversation as resolved.
Show resolved Hide resolved
"""
The default constructor takes a dictionary as an argument and creates each
field as a separate property.\\
Expand All @@ -60,7 +60,7 @@ def __init__(
:param dir_path: path to the directory where the metadata file is stored.
:param file_name: (optional) name of the metadata file.
"""
parse_metadata.contains_tsdf_mandatory_fields(dictionary)
parse_metadata.contains_tsdf_mandatory_fields(dictionary) #TODO: how to load a dict that is not complete yet?
vedran-kasalica marked this conversation as resolved.
Show resolved Hide resolved
for key, value in dictionary.items():
setattr(self, key, value)
self.file_dir_path = dir_path
Expand Down
28 changes: 28 additions & 0 deletions src/tsdf/write_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,38 @@
import os
from typing import Any, Dict
import numpy as np
import pandas as pd
from tsdf import numpy_utils

from tsdf.tsdfmetadata import TSDFMetadata


def write_dataframe_to_binaries(
file_dir: str, df: pd.DataFrame, metadatas: [TSDFMetadata]
) -> None:
"""
Save binary file based on the provided pandas DataFrame.

:param file_dir: path to the directory where the file will be saved.
:param file_name: name of the file to be saved.
:param data: pandas DataFrame containing the data.
:param metadata: dictionary containing the metadata.

:return: TSDFMetadata object.
"""
for metadata in metadatas:
file_name = metadata.file_name
path = os.path.join(file_dir, file_name)

# Write
data = df[metadata.channels].to_numpy() # TODO: derive channels from dataframe or use specified in metadata? Also for file_name?
data.tofile(path)

# Update metadata with data properties
data_props = _get_metadata_from_ndarray(data)
for key in data_props:
metadata.__setattr__(key, data_props[key])


def _get_metadata_from_ndarray(data: np.ndarray) -> Dict[str, Any]:
"""
Expand Down Expand Up @@ -48,5 +75,6 @@ def write_binary_file(
data.tofile(path)
metadata.update(_get_metadata_from_ndarray(data))
metadata.update({"file_name": file_name})
#TODO: update file_dir_path?
vedran-kasalica marked this conversation as resolved.
Show resolved Hide resolved
vedran-kasalica marked this conversation as resolved.
Show resolved Hide resolved

return TSDFMetadata(metadata, file_dir)
6 changes: 6 additions & 0 deletions tests/test_read_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,9 @@ def test_random_access(shared_datadir):
data = tsdf.load_binary_from_metadata(metadata[name + ".bin"], 2, 6)
assert(data.shape == (4, 3))
assert(data.dtype == "int16")

def test_load_binary_to_dataframe(shared_datadir):
vedran-kasalica marked this conversation as resolved.
Show resolved Hide resolved
metadata = tsdf.load_metadata_from_path(shared_datadir / "ppp_format_meta.json")
df = tsdf.load_binaries_to_dataframe([metadata["ppp_format_time.bin"], metadata["ppp_format_samples.bin"]])
assert(df.shape == (17, 7))
assert(df.columns.tolist() == ["time", "acceleration_x", "acceleration_y", "acceleration_z", "rotation_x", "rotation_y", "rotation_z"])
Loading
Loading