diff --git a/pdr_backend/accuracy/app.py b/pdr_backend/accuracy/app.py index 9a91d51ac..0299ae506 100644 --- a/pdr_backend/accuracy/app.py +++ b/pdr_backend/accuracy/app.py @@ -1,12 +1,12 @@ import logging import threading +import json from datetime import datetime, timedelta from typing import Any, Callable, Dict, List, Optional, Tuple from enforce_typing import enforce_types from flask import Flask, jsonify -from pdr_backend.lake.etl import ETL from pdr_backend.lake.gql_data_factory import GQLDataFactory from pdr_backend.lake.persistent_data_store import PersistentDataStore from pdr_backend.lake.slot import Slot @@ -23,6 +23,14 @@ SECONDS_IN_A_DAY = 86400 logger = logging.getLogger("accuracy_app") +# Take the ppss file from the cli run command + +accuracy_ppss = PPSS( + yaml_filename="./ppss.yaml", + network="sapphire-mainnet", + nested_override_args=None, +) + @enforce_types def calculate_prediction_result( @@ -248,18 +256,57 @@ def calculate_timeframe_timestamps( @enforce_types -def fetch_statistics_using_ETL(): - # return - ppss = PPSS( - yaml_filename="./ppss.yaml", - network="sapphire-mainnet", - nested_override_args=None, +def calculate_statistics_from_DuckDB_tables(): + four_weeks_ago = datetime.utcnow() - timedelta(weeks=4) + start_ts = UnixTimeS(int(four_weeks_ago.timestamp())) + + db_conn = PersistentDataStore(accuracy_ppss.lake_ss.lake_dir) + slots_table_name = Slot.get_lake_table_name() + + slots_table = db_conn.query_data( + f""" + SELECT * FROM {slots_table_name} WHERE SLOT > {start_ts} + """ + ) + print( + "THE_QUERY:", + f""" + SELECT * FROM {slots_table_name} WHERE SLOT > {start_ts} + """, ) + db_conn.duckdb_conn.close() + + slots_table = slots_table.group_by("ID").first() + slots_table = slots_table.sort("slot") + + all_slots: List[Slot] = [] + + # Iterate over rows and create objects + for row in slots_table.rows(named=True): + slot = Slot( + row["ID"], + row["timestamp"], + row["slot"], + row["truevalue"], + row["roundSumStakesUp"], + row["roundSumStakes"], + ) + all_slots.append(slot) - gql_data_factory = GQLDataFactory(ppss) - etl = ETL(ppss, gql_data_factory) + data = transform_slots_to_statistics(all_slots) + json_data = json.dumps(data) + ## put the data in a file + with open(JSON_FILE_PATH, "w") as f: + f.write(json_data) + + +@enforce_types +def fetch_statistics_using_ETL(): + # return + gql_data_factory = GQLDataFactory(accuracy_ppss) while True: - etl.do_etl() + gql_data_factory.get_gql_tables() + calculate_statistics_from_DuckDB_tables() threading.Event().wait(300) # Wait for 5 minutes (300 seconds) @@ -339,7 +386,7 @@ def transform_slots_to_statistics(all_slots: List[Slot]): @enforce_types @app.route("/statistics", methods=["GET"]) -def calculate_statistics_from_DuckDB_tables(): +def serve_statisctics(): """ Serves statistical data from a JSON file via a GET request. @@ -349,38 +396,10 @@ def calculate_statistics_from_DuckDB_tables(): If the file cannot be read or another error occurs, it returns a 500 Internal Server Error. """ - - four_weeks_ago = datetime.utcnow() - timedelta(weeks=4) - start_ts = UnixTimeS(int(four_weeks_ago.timestamp())) - end_ts = UnixTimeS(int(datetime.utcnow().timestamp())) try: - db_conn = PersistentDataStore("./lake_data", read_only=True) - slots_table_name = Slot.get_lake_table_name() - slots_table = db_conn.query_data( - f""" - SELECT * FROM {slots_table_name} WHERE SLOT > {start_ts} AND SLOT < {end_ts} - """ - ) - db_conn.duckdb_conn.close() - - slots_table = slots_table.group_by("ID").first() - slots_table = slots_table.sort("slot") - - all_slots: List[Slot] = [] - - # Iterate over rows and create objects - for row in slots_table.rows(named=True): - slot = Slot( - row["ID"], - row["timestamp"], - row["slot"], - row["truevalue"], - row["roundSumStakesUp"], - row["roundSumStakes"], - ) - all_slots.append(slot) - - data = transform_slots_to_statistics(all_slots) + with open(JSON_FILE_PATH, "r") as f: + data = json.load(f) # Load JSON data from file + response = jsonify(data) response.headers.add("Access-Control-Allow-Origin", "*") # Allow any origin return response diff --git a/pdr_backend/accuracy/readme.md b/pdr_backend/accuracy/readme.md new file mode 100644 index 000000000..a858482c8 --- /dev/null +++ b/pdr_backend/accuracy/readme.md @@ -0,0 +1,80 @@ +# pdr_backend/accuracy + +This document provides instructions on how to deploy and run the `app.py` script in the `pdr_backend/accuracy` directory. + +## Requirements + +- Python 3.x +- Required Python packages (listed in `requirements.txt`) + +## Deployment + +```bash +git clone +cd pdr_backend + +pip install -r requirements.txt +``` + +## Usage + +The `app.py` script is used to calculate the accuracy of a model based on the predicted and true values. + +```bash +python pdr_backend/accuracy/app.py +``` + + +The script uses the `GQLDataFactory` to fetch the predictions. Be sure to provide the correct values for `st_timestr` in the `ppss.yaml` file to get the correct predictions. It needs to be at least `28 days ago` from the current date. + +The script will output the accuracy of the predictoor based on the provided values to a file named `pdr_backend/accuracy/output/accuracy.json`. + +The data includes the pair name, the average accuracy of the model, the total staked amount yesterday, and the total staked amount today. + +Example Output: +```json + +[ + { + "alias": "5m", + "statistics": { + "0xb1c55346023dee4d8b0d7b10049f0c8854823766": { + "token_name": "LTC/USDT", + "average_accuracy": 53.67847411444142, + "total_staked_yesterday": 217456.43999999997, + "total_staked_today": 203650.03999999992 + }, + ........ + } + }, + { + "alias": "1h", + "statistics": { + "0xb1c55346023dee4d8b0d7b10049f0c8854823766": { + "token_name": "LTC/USDT", + "average_accuracy": 53.67847411444142, + "total_staked_yesterday": 217456.43999999997, + "total_staked_today": 203650.03999999992 + }, + ........ + + } +] +``` + +## Flask API + +The `app.py` script provides a Flask endpoint with `/statistics` route to get the accuracy of the model. + +```bash +curl http://localhost:5000/statistics +``` + +The endpoint will return the accuracy of the model in JSON format. + + +## Warning about multithreading + +The `app.py` script uses multithreading to handle multiple tasks simultaneously. Please ensure that the script is run in a thread-safe environment to avoid any issues. + +The main thread will be used to handle the Flask API requests, while the worker threads will be used to calculate the accuracy of the model. \ No newline at end of file diff --git a/pdr_backend/accuracy/test/test_app.py b/pdr_backend/accuracy/test/test_app.py index 18abb35cc..6d9769300 100644 --- a/pdr_backend/accuracy/test/test_app.py +++ b/pdr_backend/accuracy/test/test_app.py @@ -1,5 +1,8 @@ from typing import List +from datetime import datetime, timedelta +from unittest.mock import patch +import polars as pl from enforce_typing import enforce_types from pdr_backend.subgraph.subgraph_predictions import ContractIdAndSPE @@ -8,9 +11,12 @@ process_single_slot, aggregate_statistics, calculate_statistics_for_all_assets, + calculate_statistics_from_DuckDB_tables, ) from pdr_backend.lake.slot import Slot from pdr_backend.util.time_types import UnixTimeS +from pdr_backend.lake.persistent_data_store import PersistentDataStore +from pdr_backend.ppss.ppss import mock_ppss # Sample data for tests SAMPLE_PREDICT_SLOT = Slot( @@ -87,3 +93,91 @@ def test_calculate_statistics_for_all_assets(): print("test_calculate_statistics_for_all_assets", statistics) # Verify assert statistics["0xAsset"]["average_accuracy"] == 100.0 + + +@enforce_types +def test_calculate_statistics_from_DuckDB_tables(tmpdir): + ppss = mock_ppss( + [{"predict": "binance BTC/USDT c 5m", "train_on": "binance BTC/USDT c 5m"}], + "sapphire-mainnet", + str(tmpdir), + st_timestr="2023-12-20", + fin_timestr="now", + ) + # check if slots table exists + PersistentDataStore(ppss.lake_ss.lake_dir).execute_sql( + "DROP TABLE IF EXISTS slots;" + ) + + two_weeks_ago = datetime.utcnow() - timedelta(weeks=2) + slot_timestamp = UnixTimeS(int(two_weeks_ago.timestamp())) + + # Generate 100 slot timestamps with 5 minute intervals + slot_timestamps = [slot_timestamp + i * 300 for i in range(100)] + + # generate IDS with 0x18f54cc21b7a2fdd011bea06bba7801b280e3151-slot_timestamp + generated_ids = [ + f"0x18f54cc21b7a2fdd011bea06bba7801b280e3151-{slot}" for slot in slot_timestamps + ] + # slots dataframe + slots_df = pl.DataFrame( + { + "ID": generated_ids, + "timestamp": slot_timestamps, + "slot": slot_timestamps, + "truevalue": [True] * 100, + "roundSumStakesUp": [150.0] * 100, + "roundSumStakes": [100.0] * 100, + } + ) + + # Main work + PersistentDataStore(ppss.lake_ss.lake_dir).insert_to_table(slots_df, "pdr_slots") + + test_json_file_path = str(tmpdir.join("test.json")) + with patch("pdr_backend.accuracy.app.JSON_FILE_PATH", test_json_file_path): + with patch("pdr_backend.accuracy.app.accuracy_ppss", ppss): + calculate_statistics_from_DuckDB_tables() + + # Verify + expected_result = """[{"alias": "5m", "statistics": {"0x18f54cc21b7a2fdd011bea06bba7801b280e3151": {"token_name": "ADA/USDT", "average_accuracy": 100.0, "total_staked_yesterday": 0.0, "total_staked_today": 0.0}}}, {"alias": "1h", "statistics": {}}]""" # pylint: disable=line-too-long + + with open(test_json_file_path, "r") as f: + result = f.read() + assert result == expected_result + + # Test with false values + false_start_timestamp = slot_timestamps[-1] + 300 + + # Generate 100 slot timestamps with 5 minute intervals + false_slot_timestamps = [false_start_timestamp + i * 300 for i in range(100)] + + # generate IDS with 0x18f54cc21b7a2fdd011bea06bba7801b280e3151-slot_timestamp + generated_ids = [ + f"0x18f54cc21b7a2fdd011bea06bba7801b280e3151-{slot}" + for slot in false_slot_timestamps + ] + # slots dataframe + false_slots = pl.DataFrame( + { + "ID": generated_ids, + "timestamp": slot_timestamps, + "slot": slot_timestamps, + "truevalue": [False] * 100, + "roundSumStakesUp": [150.0] * 100, + "roundSumStakes": [100.0] * 100, + } + ) + + PersistentDataStore(ppss.lake_ss.lake_dir).insert_to_table(false_slots, "pdr_slots") + + test_json_file_path = str(tmpdir.join("test.json")) + with patch("pdr_backend.accuracy.app.JSON_FILE_PATH", test_json_file_path): + with patch("pdr_backend.accuracy.app.accuracy_ppss", ppss): + calculate_statistics_from_DuckDB_tables() + + expected_result = """[{"alias": "5m", "statistics": {"0x18f54cc21b7a2fdd011bea06bba7801b280e3151": {"token_name": "ADA/USDT", "average_accuracy": 50.0, "total_staked_yesterday": 0.0, "total_staked_today": 0.0}}}, {"alias": "1h", "statistics": {}}]""" # pylint: disable=line-too-long + + with open(test_json_file_path, "r") as f: + result = f.read() + assert result == expected_result