Skip to content

Commit

Permalink
suggested changes
Browse files Browse the repository at this point in the history
  • Loading branch information
peterc-yuma committed Dec 19, 2024
1 parent 0ec8642 commit 8e93c72
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 67 deletions.
4 changes: 2 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ readme = "README.md"
# ~1.2.3 means >=1.2.3 and <1.3.0

# Python version - 3.9, 3.10, 3.11
python = ">= 3.9, < 3.12"
python = ">3.9.1,<3.12"

# Bittensor Version Strict
bittensor = "7.4.0"
Expand All @@ -35,6 +35,7 @@ pandas-market-calendars = "^4.4.2"
python-dotenv = "^1.0.1"
scikit-learn = "^1.6.0"
wandb = "^0.19.1"
cryptography = ">=42.0.5,<42.1.0"

[tool.poetry.group.dev.dependencies]
pre-commit-hooks = "5.0.0"
Expand Down
53 changes: 6 additions & 47 deletions snp_oracle/predictionnet/utils/dataset_manager.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,3 @@
# The MIT License (MIT)
# Copyright © 2024 Foundry Digital

# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
# the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

import asyncio
import json
import os
Expand All @@ -27,8 +10,6 @@

import bittensor as bt
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
from cryptography.fernet import Fernet
from dotenv import load_dotenv
from git import Repo
Expand Down Expand Up @@ -132,13 +113,9 @@ def store_local_data(
if metadata:
full_metadata.update(metadata)

# Convert to PyArrow Table with metadata
table = pa.Table.from_pandas(miner_data)
for key, value in full_metadata.items():
table = table.replace_schema_metadata({**table.schema.metadata, key.encode(): str(value).encode()})

# Write Parquet file with compression
pq.write_table(table, file_path, compression="snappy", use_dictionary=True, use_byte_stream_split=True)
# Add metadata to DataFrame and save to parquet
miner_data.attrs.update(full_metadata)
miner_data.to_parquet(file_path, engine="pyarrow", compression="snappy", index=False)

return True, {
"local_path": str(file_path),
Expand Down Expand Up @@ -282,27 +259,9 @@ def decrypt_data(self, data_path: str, decryption_key: bytes) -> Tuple[bool, Dic
temp_file.write(decrypted_data)
temp_file.flush()

# Read Parquet file
table = pq.read_table(temp_file.name)
df = table.to_pandas()

# Extract metadata from Parquet schema
metadata = {}
predictions = None

if table.schema.metadata:
for key, value in table.schema.metadata.items():
try:
key_str = key.decode() if isinstance(key, bytes) else key
value_str = value.decode() if isinstance(value, bytes) else value

if key_str == "predictions":
predictions = json.loads(value_str)
else:
metadata[key_str] = value_str
except Exception as e:
bt.logging.error(f"Error while extracting metadata: {str(e)}")
continue
df = pd.read_parquet(temp_file.name)
metadata = df.attrs.copy()
predictions = json.loads(metadata.pop("predictions", "null"))

return True, {"data": df, "metadata": metadata, "predictions": predictions}

Expand Down
24 changes: 7 additions & 17 deletions snp_oracle/predictionnet/utils/miner_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

import bittensor as bt
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
from cryptography.fernet import Fernet
from dotenv import load_dotenv
from huggingface_hub import HfApi
Expand Down Expand Up @@ -92,7 +90,7 @@ def upload_model(self, repo_id=None, model_path=None, hotkey=None):
bt.logging.debug(f"Error in upload_model: {str(e)}")
return False, {"error": str(e)}

def upload_data(self, repo_id=None, data: pd.DataFrame = None, hotkey=None, encryption_key=None):
def upload_data(self, repo_id, data: pd.DataFrame, hotkey=None, encryption_key=None):
"""
Upload encrypted training/validation data to HuggingFace Hub using Parquet format.
Expand Down Expand Up @@ -145,22 +143,14 @@ def upload_data(self, repo_id=None, data: pd.DataFrame = None, hotkey=None, encr
temp_encrypted = os.path.join(temp_dir, data_filename)

try:
# Convert to PyArrow Table with metadata
table = pa.Table.from_pandas(data)
table = table.replace_schema_metadata(
{
**table.schema.metadata,
b"timestamp": timestamp.encode(),
b"hotkey": hotkey.encode() if hotkey else b"",
}
)
# Add metadata to the DataFrame
data.attrs["timestamp"] = timestamp
data.attrs["hotkey"] = hotkey if hotkey else ""

# Write Parquet file with compression
pq.write_table(
table, temp_parquet, compression="snappy", use_dictionary=True, use_byte_stream_split=True
)
# Write to parquet with compression
data.to_parquet(temp_parquet, compression="snappy", engine="pyarrow")

# Read and encrypt the Parquet file
# Read and encrypt the temporary Parquet file
with open(temp_parquet, "rb") as f:
parquet_data = f.read()
encrypted_data = fernet.encrypt(parquet_data)
Expand Down

0 comments on commit 8e93c72

Please sign in to comment.