diff --git a/pdr_backend/aimodel/aimodel_data_factory.py b/pdr_backend/aimodel/aimodel_data_factory.py index 26cf43597..2913a4476 100644 --- a/pdr_backend/aimodel/aimodel_data_factory.py +++ b/pdr_backend/aimodel/aimodel_data_factory.py @@ -57,6 +57,10 @@ class AimodelDataFactory: def __init__(self, ss: PredictoorSS): self.ss = ss + @staticmethod + def testshift(test_n: int, test_i: int) -> int: + return test_n - test_i - 1 + def create_xy( self, mergedohlcv_df: pl.DataFrame, @@ -156,10 +160,12 @@ def create_xy( assert len(x_list) == len(xrecent_list) == len(xcol_list) x_df = pd.concat(x_list, keys=xcol_list, axis=1) xrecent_df = pd.concat(xrecent_list, keys=xcol_list, axis=1) + assert x_df.shape[0] == N_train + 1 # the +1 is for test # convert x dfs to numpy arrays X = x_df.to_numpy() xrecent = xrecent_df.to_numpy()[0, :] + assert X.shape[0] == N_train + 1 # the +1 is for test # y is set from yval_{exch_str, signal_str, pair_str} hist_col = hist_col_name(predict_feed) @@ -215,6 +221,12 @@ def _slice(x: list, st: int, fin: int) -> list: assert st < 0 assert fin <= 0 assert st < fin + assert abs(st) <= len(x), f"st is out of bounds. st={st}, len(x)={len(x)}" + if fin == 0: - return x[st:] - return x[st:fin] + slicex = x[st:] + else: + slicex = x[st:fin] + + assert len(slicex) == fin - st, (len(slicex), fin - st, st, fin) + return slicex diff --git a/pdr_backend/aimodel/test/test_aimodel_data_factory_main.py b/pdr_backend/aimodel/test/test_aimodel_data_factory_main.py index 9badd9425..1897455b5 100644 --- a/pdr_backend/aimodel/test/test_aimodel_data_factory_main.py +++ b/pdr_backend/aimodel/test/test_aimodel_data_factory_main.py @@ -109,12 +109,17 @@ def test_create_xy_notransform__1exchange_1coin_1signal(): ] d = predictoor_ss_test_dict(feedset_list=feedset_list) predictoor_ss = PredictoorSS(d) + predictoor_ss.aimodel_data_ss.set_max_n_train(7) + predictoor_ss.aimodel_data_ss.set_autoregressive_n(3) + predict_feed = predictoor_ss.predict_train_feedsets[0].predict train_feeds = predictoor_ss.predict_train_feedsets[0].train_on aimodel_data_factory = AimodelDataFactory(predictoor_ss) mergedohlcv_df = merge_rawohlcv_dfs(ETHUSDT_RAWOHLCV_DFS) # =========== have testshift = 0 + # no. rows of X = len(y) = max_n_train + max_n_test(=1) = 7 + 1 = 8 + # no. cols of X = autoregressive_n * num_signals = 3 * 1 = 3 target_X = np.array( [ [11.0, 10.0, 9.0], # oldest diff --git a/pdr_backend/aimodel/test/test_slice.py b/pdr_backend/aimodel/test/test_slice.py new file mode 100644 index 000000000..433755b6e --- /dev/null +++ b/pdr_backend/aimodel/test/test_slice.py @@ -0,0 +1,40 @@ +from enforce_typing import enforce_types +import pytest + +from pdr_backend.aimodel.aimodel_data_factory import _slice + + +@enforce_types +def test_slice__happy_path(): + x = [1, 2, 3, 4, 5, 6, 7] + assert _slice(x=x, st=-2, fin=0) == [6, 7] + assert _slice(x=x, st=-3, fin=-1) == [5, 6] + assert _slice(x=x, st=-7, fin=-5) == [1, 2] + assert _slice(x=x, st=-7, fin=-6) == [1] + assert _slice(x=x, st=-7, fin=0) == x + assert _slice(x=[1], st=-1, fin=0) == [1] + + +@enforce_types +def test_slice__unhappy_path(): + # need st < 0 + with pytest.raises(AssertionError): + _ = _slice(x=[1, 2, 3], st=0, fin=-2) + + # need fin <= 0 + with pytest.raises(AssertionError): + _ = _slice(x=[1, 2, 3], st=-2, fin=1) + + # need st < fin + with pytest.raises(AssertionError): + _ = _slice(x=[1, 2, 3], st=-4, fin=-4) + + with pytest.raises(AssertionError): + _ = _slice(x=[1, 2, 3], st=-4, fin=-5) + + # st out of bounds + with pytest.raises(AssertionError): + _slice(x=[1, 2, 3, 4, 5, 6, 7], st=-8, fin=-5) + + with pytest.raises(AssertionError): + _slice(x=[], st=-1, fin=0) diff --git a/pdr_backend/aimodel/test/test_true_vs_pred.py b/pdr_backend/aimodel/test/test_true_vs_pred.py new file mode 100644 index 000000000..c4af647d6 --- /dev/null +++ b/pdr_backend/aimodel/test/test_true_vs_pred.py @@ -0,0 +1,74 @@ +from enforce_typing import enforce_types +from pytest import approx + +from pdr_backend.aimodel.true_vs_pred import PERF_NAMES, TrueVsPred + + +# pylint: disable=too-many-statements +@enforce_types +def test_true_vs_pred(): + d = TrueVsPred() + assert d.truevals == [] + assert d.predprobs == [] + assert d.predvals == [] + assert d.n_correct == 0 + assert d.n_trials == 0 + + # true = up, guess = up (correct guess) + d.update(trueval=True, predprob=0.6) + assert d.truevals == [True] + assert d.predprobs == [0.6] + assert d.predvals == [True] + assert d.n_correct == 1 + assert d.n_trials == 1 + assert len(d.accuracy()) == 3 + assert d.accuracy()[0] == 1.0 / 1.0 + + # true = down, guess = down (correct guess) + d.update(trueval=False, predprob=0.3) + assert d.truevals == [True, False] + assert d.predprobs == [0.6, 0.3] + assert d.predvals == [True, False] + assert d.n_correct == 2 + assert d.n_trials == 2 + assert d.accuracy()[0] == 2.0 / 2.0 + + # true = up, guess = down (incorrect guess) + d.update(trueval=True, predprob=0.4) + assert d.truevals == [True, False, True] + assert d.predprobs == [0.6, 0.3, 0.4] + assert d.predvals == [True, False, False] + assert d.n_correct == 2 + assert d.n_trials == 3 + assert d.accuracy()[0] == approx(2.0 / 3.0) + + # true = down, guess = up (incorrect guess) + d.update(trueval=False, predprob=0.7) + assert d.truevals == [True, False, True, False] + assert d.predprobs == [0.6, 0.3, 0.4, 0.7] + assert d.predvals == [True, False, False, True] + assert d.n_correct == 2 + assert d.n_trials == 4 + assert d.accuracy()[0] == approx(2.0 / 4.0) + + # test performance values + (acc_est, acc_l, acc_u) = d.accuracy() + assert acc_est == approx(0.5) + assert acc_l == approx(0.010009003864986377) + assert acc_u == approx(0.9899909961350136) + + (precision, recall, f1) = d.precision_recall_f1() + assert precision == approx(0.5) + assert recall == approx(0.5) + assert f1 == approx(0.5) + + loss = d.log_loss() + assert loss == approx(0.7469410259762035) + + assert d.perf_names() == PERF_NAMES + assert len(d.perf_values()) == len(PERF_NAMES) + + target_values = [acc_est, acc_l, acc_u, precision, recall, f1, loss] + values = d.perf_values() + for val, target_val in zip(values, target_values): + assert val == approx(target_val) diff --git a/pdr_backend/aimodel/true_vs_pred.py b/pdr_backend/aimodel/true_vs_pred.py new file mode 100644 index 000000000..326a0dd6d --- /dev/null +++ b/pdr_backend/aimodel/true_vs_pred.py @@ -0,0 +1,76 @@ +from typing import List, Tuple + +from enforce_typing import enforce_types + +from sklearn.metrics import log_loss, precision_recall_fscore_support +from statsmodels.stats.proportion import proportion_confint + +PERF_NAMES = ["acc_est", "acc_l", "acc_u", "f1", "precision", "recall", "loss"] + +LOG_LOSS_ON_CONSTANT = 0.5 + + +class TrueVsPred: + """ + True vs pred vals for a single aimodel, or for a history of models, + + the performances that derive from true vs pred value info + """ + + @enforce_types + def __init__(self): + # 'i' is iteration number i + self.truevals: List[bool] = [] # [i] : true value + self.predprobs: List[float] = [] # [i] : model's pred. prob. + + @enforce_types + def update(self, trueval: bool, predprob: float): + self.truevals.append(trueval) + self.predprobs.append(predprob) + + @property + def predvals(self) -> List[bool]: + """@return [i] : model pred. value""" + return [p > 0.5 for p in self.predprobs] + + @property + def n_correct(self) -> int: + return sum(tv == pv for tv, pv in zip(self.truevals, self.predvals)) + + @property + def n_trials(self) -> int: + return len(self.truevals) + + @enforce_types + def accuracy(self) -> Tuple[float, float, float]: + n_correct, n_trials = self.n_correct, self.n_trials + acc_est = n_correct / n_trials + acc_l, acc_u = proportion_confint(count=n_correct, nobs=n_trials) + return (acc_est, acc_l, acc_u) + + @enforce_types + def precision_recall_f1(self) -> Tuple[float, float, float]: + (precision, recall, f1, _) = precision_recall_fscore_support( + self.truevals, + self.predvals, + average="binary", + zero_division=0.0, + ) + return (precision, recall, f1) + + @enforce_types + def log_loss(self) -> float: + if min(self.truevals) == max(self.truevals): + return LOG_LOSS_ON_CONSTANT + return log_loss(self.truevals, self.predprobs) + + @enforce_types + def perf_names(self) -> List[str]: + return PERF_NAMES + + @enforce_types + def perf_values(self) -> List[float]: + perfs_list = ( + list(self.accuracy()) + list(self.precision_recall_f1()) + [self.log_loss()] + ) + assert len(perfs_list) == len(PERF_NAMES) + return perfs_list diff --git a/pdr_backend/binmodel/binmodel.py b/pdr_backend/binmodel/binmodel.py new file mode 100644 index 000000000..22adc5f4f --- /dev/null +++ b/pdr_backend/binmodel/binmodel.py @@ -0,0 +1,32 @@ +from enforce_typing import enforce_types + +from pdr_backend.aimodel.aimodel import Aimodel +from pdr_backend.binmodel.constants import UP, DOWN + + +@enforce_types +class Binmodel(dict): + + def __init__(self, model_UP: Aimodel, model_DOWN: Aimodel): + self[UP] = model_UP + self[DOWN] = model_DOWN + + def predict_next(self, X_test: dict) -> dict: + """ + @arguments + X_test_dict -- dict of {UP: X_test_UP_array, DOWN: X_test_DOWN_arr.} + It expects each array to have exactly 1 row, to predict from + + @return + predprob -- dict of {UP: predprob_UP_float, DOWN: predprob_DOWN_float} + """ + assert X_test[UP].shape[0] == 1 + assert X_test[DOWN].shape[0] == 1 + + prob_UP = self[UP].predict_ptrue(X_test[UP])[0] + prob_DOWN = self[DOWN].predict_ptrue(X_test[UP])[0] + + # ensure not np.float64. Why: applying ">" gives np.bool --> problems + prob_UP, prob_DOWN = float(prob_UP), float(prob_DOWN) + + return {UP: prob_UP, DOWN: prob_DOWN} diff --git a/pdr_backend/binmodel/binmodel_data.py b/pdr_backend/binmodel/binmodel_data.py new file mode 100644 index 000000000..e0ca74eb1 --- /dev/null +++ b/pdr_backend/binmodel/binmodel_data.py @@ -0,0 +1,59 @@ +from typing import Dict, List + +from enforce_typing import enforce_types +import numpy as np + +from pdr_backend.binmodel.constants import Dirn, UP, DOWN + + +class BinmodelData1Dir: + + @enforce_types + def __init__( + self, + X: np.ndarray, + ytrue: np.ndarray, + colnames: List[str], + ): + assert len(X.shape) == 2 + assert len(ytrue.shape) == 1 + assert X.shape[0] == ytrue.shape[0], (X.shape[0], ytrue.shape[0]) + + self.X: np.ndarray = X + self.ytrue: np.ndarray = ytrue + self.colnames: List[str] = colnames + + @property + def st(self) -> int: + return 0 + + @property + def fin(self) -> int: + return self.X.shape[0] - 1 + + @property + def X_train(self) -> np.ndarray: + return self.X[self.st : self.fin, :] + + @property + def ytrue_train(self) -> np.ndarray: + return self.ytrue[self.st : self.fin] + + @property + def X_test(self) -> np.ndarray: + return self.X[self.fin : self.fin + 1, :] + + +class BinmodelData(dict): + @enforce_types + def __init__( + self, + data_UP: BinmodelData1Dir, + data_DOWN: BinmodelData1Dir, + ): + self[UP] = data_UP + self[DOWN] = data_DOWN + + @property + def X_test(self) -> Dict[Dirn, np.ndarray]: + return {UP: self[UP].X_test, DOWN: self[DOWN].X_test} diff --git a/pdr_backend/binmodel/binmodel_data_factory.py b/pdr_backend/binmodel/binmodel_data_factory.py new file mode 100644 index 000000000..572047117 --- /dev/null +++ b/pdr_backend/binmodel/binmodel_data_factory.py @@ -0,0 +1,175 @@ +from typing import List, Tuple + +from enforce_typing import enforce_types +import numpy as np +import polars as pl + +from pdr_backend.binmodel.binmodel_data import BinmodelData, BinmodelData1Dir +from pdr_backend.cli.arg_feed import ArgFeed +from pdr_backend.cli.arg_feeds import ArgFeeds +from pdr_backend.aimodel.aimodel_data_factory import AimodelDataFactory +from pdr_backend.ppss.ppss import PPSS +from pdr_backend.ppss.predictoor_ss import PredictoorSS + + +class BinmodelDataFactory: + @enforce_types + def __init__(self, ppss: PPSS): + self.ppss = ppss + + @property + def pdr_ss(self) -> PredictoorSS: + return self.ppss.predictoor_ss + + @property + def class_thr(self) -> float: + return self.pdr_ss.aimodel_data_ss.class_thr + + @property + def predict_feed(self) -> ArgFeed: + return self.pdr_ss.predict_train_feedsets[0].predict + + @enforce_types + def feed_variant(self, signal_str: str) -> ArgFeed: + assert signal_str in ["close", "high", "low"] + return self.predict_feed.variant_signal(signal_str) + + @property + def max_n_train(self) -> float: + return self.pdr_ss.aimodel_data_ss.max_n_train + + def set_max_n_train(self, n: int) -> float: + return self.pdr_ss.aimodel_data_ss.set_max_n_train(n) + + def testshift(self, test_i: int) -> int: + test_n = self.ppss.sim_ss.test_n + return AimodelDataFactory.testshift(test_n, test_i) + + @enforce_types + def build(self, test_i: int, mergedohlcv_df: pl.DataFrame) -> BinmodelData: + """Construct sim model data""" + # main work + X, colnames = self._build_X(test_i, mergedohlcv_df) + ytrue_UP, ytrue_DOWN = self._build_ytrue(test_i, mergedohlcv_df) + + # key check + assert ( + X.shape[0] == ytrue_UP.shape[0] == ytrue_DOWN.shape[0] + ), "X and y must have same # samples" + + # build final object, return + d_UP = BinmodelData1Dir(X, ytrue_UP, colnames) + d_DOWN = BinmodelData1Dir(X, ytrue_DOWN, colnames) + d = BinmodelData(d_UP, d_DOWN) + return d + + @enforce_types + def _build_X(self, test_i: int, df) -> Tuple[np.ndarray, List[str]]: + """ + @description + Build X for training/testing both UP and DOWN models. + (It could be same or different for both. Here, it's the same.) + + @return + X -- 2d array [sample_i][var_i] + colnames -- list [var_i] of str + """ + # base data + data_f = AimodelDataFactory(self.pdr_ss) + testshift = self.testshift(test_i) # eg [99, 98, .., 2, 1, 0] + + # main work + X, _, _, x_df, _ = data_f.create_xy( + df, + testshift, + self.feed_variant("low"), # arbitrary + ArgFeeds( + [ + self.feed_variant("high"), + self.feed_variant("low"), + self.feed_variant("close"), + ] + ), + ) + colnames = list(x_df.columns) + + # We don't need to split X/y into train & test here. + # Rather, it happens inside BinmodelFactory.build() + # which calls BinmodelData1Dir.X_train(), ytrue_train(), and X_test() + + # done + return (X, colnames) + + @enforce_types + def _build_ytrue(self, test_i: int, df) -> Tuple[np.ndarray, np.ndarray]: + """ + @description + Build y for training/testing both UP and DOWN models. + (It's usually different for UP vs down; and that's the case here.) + @return + ytrue_UP -- [sample_i] : bool -- outputs for training UP model + ytrue_DOWN -- [sample_i] : bool -- outputs for training DOWN model + """ + # grab y_close/high/low from df + # y_close, etc are in order from youngest to oldest, ie t-1, t-2, .. + y_close = self._y_incl_extra_sample(test_i, df, "close") + y_high = self._y_incl_extra_sample(test_i, df, "high") + y_low = self._y_incl_extra_sample(test_i, df, "low") + + # for 'next', truncate oldest entry (at end) + y_next_high, y_next_low = y_high[:-1], y_low[:-1] + + # for 'cur' (prev), truncate newest entry (at front) + y_cur_close = y_close[1:] + + # construct ytrue_UP/DOWN lists from comparing high/low to close+/-% + ytrue_UP_list, ytrue_DOWN_list = [], [] + for cur_close, next_high, next_low in zip(y_cur_close, y_next_high, y_next_low): + + # did the next high value go above the current close+% value? + thr_UP = self.thr_UP(cur_close) + ytrue_UP_list.append(next_high > thr_UP) + + # did the next low value go below the current close-% value? + thr_DOWN = self.thr_DOWN(cur_close) + ytrue_DOWN_list.append(next_low < thr_DOWN) + + # final conditioning, return + ytrue_UP = np.array(ytrue_UP_list) + ytrue_DOWN = np.array(ytrue_DOWN_list) + return (ytrue_UP, ytrue_DOWN) + + @enforce_types + def _y_incl_extra_sample(self, test_i: int, df, signal_str: str) -> np.ndarray: + """ + @description + + We need an extra sample because + - each value of ytrue is from computed from two different candles + (next_*, cur_*), which would naturally reduce total # samples by 1 + - yet we still want the resulting ytrue to have same # samples as X + To get that extra sample, we temporarily set max_n_train += 1 + """ + assert signal_str in ["close", "high", "low"] + + self.set_max_n_train(self.max_n_train + 1) + testshift = self.testshift(test_i) # eg [99, 98, .., 2, 1, 0] + + data_f = AimodelDataFactory(self.pdr_ss) + _, _, y, _, _ = data_f.create_xy( + df, + testshift, + self.feed_variant(signal_str), + ArgFeeds([self.feed_variant(signal_str)]), + ) + assert len(y) == self.max_n_train + 1 # num_train + num_test(=1) + self.set_max_n_train(self.max_n_train - 1) + return y + + @enforce_types + def thr_UP(self, cur_close: float) -> float: + return cur_close * (1 + self.class_thr) + + @enforce_types + def thr_DOWN(self, cur_close: float) -> float: + return cur_close * (1 - self.class_thr) diff --git a/pdr_backend/binmodel/binmodel_factory.py b/pdr_backend/binmodel/binmodel_factory.py new file mode 100644 index 000000000..094d45118 --- /dev/null +++ b/pdr_backend/binmodel/binmodel_factory.py @@ -0,0 +1,42 @@ +from typing import Optional + +from enforce_typing import enforce_types + +from pdr_backend.aimodel.aimodel_factory import AimodelFactory +from pdr_backend.binmodel.binmodel import Binmodel +from pdr_backend.binmodel.binmodel_data import BinmodelData +from pdr_backend.binmodel.constants import UP, DOWN +from pdr_backend.ppss.aimodel_ss import AimodelSS + + +class BinmodelFactory: + @enforce_types + def __init__(self, aimodel_ss: AimodelSS): + self.aimodel_ss = aimodel_ss + + @enforce_types + def do_build(self, prev_model: Optional[Binmodel], test_i: int) -> bool: + """Update/train model?""" + n = self.aimodel_ss.train_every_n_epochs + return prev_model is None or test_i % n == 0 + + @enforce_types + def build(self, data: BinmodelData) -> Binmodel: + model_f = AimodelFactory(self.aimodel_ss) + + model_UP = model_f.build( + data[UP].X_train, + data[UP].ytrue_train, + None, + None, + ) + + model_DOWN = model_f.build( + data[DOWN].X_train, + data[DOWN].ytrue_train, + None, + None, + ) + + binmodel = Binmodel(model_UP, model_DOWN) + return binmodel diff --git a/pdr_backend/binmodel/binmodel_prediction.py b/pdr_backend/binmodel/binmodel_prediction.py new file mode 100644 index 000000000..bf0807be8 --- /dev/null +++ b/pdr_backend/binmodel/binmodel_prediction.py @@ -0,0 +1,94 @@ +from enforce_typing import enforce_types + + +# pylint: disable=too-many-instance-attributes +@enforce_types +class BinmodelPrediction: + def __init__( + self, + conf_thr: float, + # UP model's probability that next high will go > prev close+% + prob_UP: float, + # DOWN model's probability that next low will go < prev close-% + prob_DOWN: float, + ): + # ensure not np.float64. Why: applying ">" gives np.bool --> problems + prob_UP, prob_DOWN = float(prob_UP), float(prob_DOWN) + + # ppss.trader_ss.sim_confidence_threshold + self.conf_thr = conf_thr + + # core attributes + self.prob_UP = prob_UP + self.prob_DOWN = prob_DOWN + + # derived attributes + if self.models_in_conflict(): + self.conf_up = 0.0 + self.conf_down = 0.0 + self.pred_up = False + self.pred_down = False + self.prob_up_MERGED = 0.5 + + elif self.prob_UP >= self.prob_DOWN: + self.conf_up = (prob_UP - 0.5) * 2.0 # to range [0,1] + self.conf_down = 0.0 + self.pred_up = self.conf_up > self.conf_thr + self.pred_down = False + self.prob_up_MERGED = self.prob_UP + + else: # prob_DOWN > prob_UP + self.conf_up = 0.0 + self.conf_down = (self.prob_DOWN - 0.5) * 2.0 + self.pred_up = False + self.pred_down = self.conf_down > self.conf_thr + self.prob_up_MERGED = 1.0 - self.prob_DOWN + + def do_trust_models(self) -> bool: + do_trust = _do_trust_models( + self.pred_up, + self.pred_down, + self.prob_UP, + self.prob_DOWN, + ) + return do_trust + + def models_in_conflict(self) -> bool: + return _models_in_conflict(self.prob_UP, self.prob_DOWN) + + +@enforce_types +def _do_trust_models( + pred_up: bool, + pred_down: bool, + prob_UP: float, + prob_DOWN: float, +) -> bool: + """Do we trust the models enough to take prediction / trading action?""" + # preconditions + if not 0.0 <= prob_UP <= 1.0: + raise ValueError(prob_UP) + if not 0.0 <= prob_DOWN <= 1.0: + raise ValueError(prob_DOWN) + if pred_up and pred_down: + raise ValueError("can't have pred_up=True and pred_down=True") + if pred_up and prob_DOWN > prob_UP: + raise ValueError("can't have pred_up=True with prob_DOWN dominant") + if pred_down and prob_UP > prob_DOWN: + raise ValueError("can't have pred_down=True with prob_UP dominant") + + # main test + return (pred_up or pred_down) and not _models_in_conflict(prob_UP, prob_DOWN) + + +@enforce_types +def _models_in_conflict(prob_UP: float, prob_DOWN: float) -> bool: + """Does the UP model conflict with the DOWN model?""" + # preconditions + if not 0.0 <= prob_UP <= 1.0: + raise ValueError(prob_UP) + if not 0.0 <= prob_DOWN <= 1.0: + raise ValueError(prob_DOWN) + + # main test + return (prob_UP > 0.5 and prob_DOWN > 0.5) or (prob_UP < 0.5 and prob_DOWN < 0.5) diff --git a/pdr_backend/binmodel/constants.py b/pdr_backend/binmodel/constants.py new file mode 100644 index 000000000..fe90dd92c --- /dev/null +++ b/pdr_backend/binmodel/constants.py @@ -0,0 +1,21 @@ +from enum import IntEnum + +from enforce_typing import enforce_types + + +class Dirn(IntEnum): + UP = 1 + DOWN = 2 + + +UP = Dirn.UP +DOWN = Dirn.DOWN + + +@enforce_types +def dirn_str(dirn: Dirn): + if dirn == UP: + return "UP" + if dirn == DOWN: + return "DOWN" + raise ValueError(dirn) diff --git a/pdr_backend/binmodel/test/resources.py b/pdr_backend/binmodel/test/resources.py new file mode 100644 index 000000000..5f8c1e6bb --- /dev/null +++ b/pdr_backend/binmodel/test/resources.py @@ -0,0 +1,33 @@ +from enforce_typing import enforce_types +import numpy as np + +from pdr_backend.binmodel.binmodel_data import BinmodelData, BinmodelData1Dir + + +@enforce_types +def get_Xy_UP() -> tuple: + X_UP = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]]) # 4 x 2 + ytrue_UP = np.array([True, True, False, True]) # 4 + return (X_UP, ytrue_UP) + + +@enforce_types +def get_Xy_DOWN() -> tuple: + X_DOWN = np.array([[11.0, 12.0], [13.0, 14.0], [15.0, 16.0], [17.0, 18.0]]) + ytrue_DOWN = np.array([False, True, False, True]) + return (X_DOWN, ytrue_DOWN) + + +@enforce_types +def get_binmodel_data() -> BinmodelData: + (X_UP, ytrue_UP) = get_Xy_UP() + (X_DOWN, ytrue_DOWN) = get_Xy_DOWN() + + colnames_UP = ["x0_high", "x1_high"] + colnames_DOWN = ["x0_low", "x1_low"] + + data_UP = BinmodelData1Dir(X_UP, ytrue_UP, colnames_UP) + data_DOWN = BinmodelData1Dir(X_DOWN, ytrue_DOWN, colnames_DOWN) + data = BinmodelData(data_UP, data_DOWN) + + return data diff --git a/pdr_backend/binmodel/test/test_binmodel.py b/pdr_backend/binmodel/test/test_binmodel.py new file mode 100644 index 000000000..cde491fb1 --- /dev/null +++ b/pdr_backend/binmodel/test/test_binmodel.py @@ -0,0 +1,23 @@ +from unittest.mock import Mock + +from enforce_typing import enforce_types +import numpy as np + +from pdr_backend.aimodel.aimodel import Aimodel +from pdr_backend.binmodel.binmodel import Binmodel +from pdr_backend.binmodel.constants import UP, DOWN + + +@enforce_types +def test_binmodel(): + model_UP = Mock(spec=Aimodel) + model_UP.predict_ptrue = Mock(return_value=np.array([0.2])) + + model_DOWN = Mock(spec=Aimodel) + model_DOWN.predict_ptrue = Mock(return_value=np.array([0.8])) + model = Binmodel(model_UP, model_DOWN) + + X_test = {UP: np.array([[1.0]]), DOWN: np.array([[2.0]])} + + predprob = model.predict_next(X_test) + assert predprob == {UP: 0.2, DOWN: 0.8} diff --git a/pdr_backend/binmodel/test/test_binmodel_constants.py b/pdr_backend/binmodel/test/test_binmodel_constants.py new file mode 100644 index 000000000..c4487fb73 --- /dev/null +++ b/pdr_backend/binmodel/test/test_binmodel_constants.py @@ -0,0 +1,30 @@ +from enforce_typing import enforce_types +import pytest + +from pdr_backend.binmodel.constants import Dirn, dirn_str, UP, DOWN + + +@enforce_types +def test_binmodel_constants__basic(): + assert UP == Dirn.UP + assert DOWN == Dirn.DOWN + + assert UP in Dirn + assert DOWN in Dirn + + +@enforce_types +def test_binmodel_constants__dirn_str(): + assert dirn_str(UP) == "UP" + assert dirn_str(DOWN) == "DOWN" + with pytest.raises(TypeError): + _ = dirn_str(3) + with pytest.raises(TypeError): + _ = dirn_str("not an int") + + +@enforce_types +def test_binmodel_constants__can_sort(): + # this is possible because Dirn inherits from IntEnum, vs Enum :) + assert sorted([Dirn.UP, Dirn.DOWN]) == [Dirn.UP, Dirn.DOWN] + assert sorted([UP, DOWN]) == [UP, DOWN] diff --git a/pdr_backend/binmodel/test/test_binmodel_data.py b/pdr_backend/binmodel/test/test_binmodel_data.py new file mode 100644 index 000000000..66d6223a9 --- /dev/null +++ b/pdr_backend/binmodel/test/test_binmodel_data.py @@ -0,0 +1,60 @@ +from enforce_typing import enforce_types +from numpy.testing import assert_array_equal + +from pdr_backend.binmodel.binmodel_data import BinmodelData, BinmodelData1Dir +from pdr_backend.binmodel.constants import Dirn, UP, DOWN +from pdr_backend.binmodel.test.resources import get_Xy_UP, get_Xy_DOWN + + +@enforce_types +def test_binmodel_data_1dir(): + # build data + (X_UP, ytrue_UP) = get_Xy_UP() + + assert X_UP.shape == (4, 2) + assert ytrue_UP.shape == (4,) + ytrue_UP_train = ytrue_UP[:3] + data_UP = BinmodelData1Dir(X_UP, ytrue_UP, ["x0", "x1"]) + + # basic tests + assert_array_equal(X_UP, data_UP.X) + assert_array_equal(ytrue_UP, data_UP.ytrue) + assert data_UP.colnames == ["x0", "x1"] + + # test properties + assert data_UP.st == 0 + assert data_UP.fin == (4 - 1) == 3 + assert_array_equal(data_UP.X_train, X_UP[0:3, :]) + assert_array_equal(data_UP.X_test, X_UP[3 : 3 + 1, :]) + assert_array_equal(data_UP.ytrue_train, ytrue_UP_train) + + +@enforce_types +def test_binmodel_data_both_dirs(): + # build data + (X_UP, ytrue_UP) = get_Xy_UP() + (X_DOWN, ytrue_DOWN) = get_Xy_DOWN() + colnames_UP = ["x0_high", "x1_high"] + colnames_DOWN = ["x0_low", "x1_low"] + + data_UP = BinmodelData1Dir(X_UP, ytrue_UP, colnames_UP) + data_DOWN = BinmodelData1Dir(X_DOWN, ytrue_DOWN, colnames_DOWN) + data = BinmodelData(data_UP, data_DOWN) + + # basic tests + assert UP in data + assert DOWN in data + for key in data: + assert isinstance(key, Dirn) + assert isinstance(data[UP], BinmodelData1Dir) + assert isinstance(data[DOWN], BinmodelData1Dir) + + assert_array_equal(X_UP, data[UP].X) + assert_array_equal(ytrue_UP, data[UP].ytrue) + + assert_array_equal(X_DOWN, data[DOWN].X) + assert_array_equal(ytrue_DOWN, data[DOWN].ytrue) + + assert sorted(data.X_test.keys()) == [UP, DOWN] + assert_array_equal(data.X_test[UP], data[UP].X_test) + assert_array_equal(data.X_test[DOWN], data[DOWN].X_test) diff --git a/pdr_backend/binmodel/test/test_binmodel_data_factory.py b/pdr_backend/binmodel/test/test_binmodel_data_factory.py new file mode 100644 index 000000000..0e032f24e --- /dev/null +++ b/pdr_backend/binmodel/test/test_binmodel_data_factory.py @@ -0,0 +1,117 @@ +from enforce_typing import enforce_types +import numpy as np +from numpy.testing import assert_array_equal +import polars as pl + +from pdr_backend.binmodel.binmodel_data import BinmodelData +from pdr_backend.binmodel.binmodel_data_factory import BinmodelDataFactory +from pdr_backend.binmodel.constants import UP, DOWN +from pdr_backend.ppss.ppss import mock_ppss, PPSS +from pdr_backend.ppss.predictoor_ss import PredictoorSS + + +@enforce_types +def test_binmodel_data_factory__basic(): + # base data + data_f = _simple_factory() + + # attributes + assert isinstance(data_f.ppss, PPSS) + + # properties + assert isinstance(data_f.pdr_ss, PredictoorSS) + assert isinstance(data_f.class_thr, float) + assert 0.0 < data_f.class_thr < 1.0 + + +@enforce_types +def test_binmodel_data_factory__testshift(): + # base data + data_f = _simple_factory() + test_i = 3 + + # do work + test_n = data_f.ppss.sim_ss.test_n + + # test + assert data_f.testshift(test_i) == test_n - test_i - 1 + + +@enforce_types +def test_binmodel_data_factory__thr_UP__thr_DOWN(): + # base data + data_f = _simple_factory() + cur_close = 8.0 + class_thr: float = data_f.ppss.predictoor_ss.aimodel_data_ss.class_thr + + # do work + thr_UP = data_f.thr_UP(cur_close) + thr_DOWN = data_f.thr_DOWN(cur_close) + + # test + assert class_thr > 0.0 + assert thr_DOWN < cur_close < thr_UP + assert thr_UP == cur_close * (1 + class_thr) + assert thr_DOWN == cur_close * (1 - class_thr) + + +@enforce_types +def _simple_factory() -> BinmodelDataFactory: + s = "binanceus ETH/USDT c 5m" + ppss = mock_ppss(feedset_list=[{"predict": s, "train_on": s}]) + return BinmodelDataFactory(ppss) + + +@enforce_types +def test_binmodel_data_factory__build(): + # ===================== + # raw data + + mergedohlcv_df = pl.DataFrame( + { + # every column in df is ordered from youngest to oldest + # i.e. [t-1, t-2, t-3, t-4, t-5] + "timestamp": [1, 2, 3, 4, 5], + "binanceus:ETH/USDT:high": [70.0, 69.0, 67.0, 68.0, 48.0], + "binanceus:ETH/USDT:low": [65.0, 61.0, 59.0, 62.0, 42.0], + "binanceus:ETH/USDT:close": [60.0, 66.0, 167.0, 64.0, 44.0], + } + ) + + # ===================== + # configure the problem + feed_s = "binanceus ETH/USDT c 5m" + ppss = mock_ppss(feedset_list=[{"predict": feed_s, "train_on": feed_s}]) + ppss.predictoor_ss.aimodel_data_ss.set_max_n_train(2) + ppss.predictoor_ss.aimodel_data_ss.set_autoregressive_n(1) + ppss.sim_ss.set_test_n(1) + test_i = 0 + + # ===================== + # set targets + # no. rows of X = len(y) = max_n_train + max_n_test(=1) = 2 + 1 = 3 + # no. cols of X = autoregressive_n * num_signals = 1 * 3 = 3 + target_X_UP = np.array( + [ # h, l, c + [69.0, 61.0, 66.0], # oldest + [67.0, 59.0, 167.0], + [68.0, 62.0, 64.0], # newest + ] + ) + + # calculated from cur_close = [167, 64, 44], + # next_low = [61, 59, 62], and next_high = [67, 68, 48] + target_ytrue_UP = np.array([False, True, True]) + + # ===================== + # main work + binmodel_data_factory = BinmodelDataFactory(ppss) + binmodel_data = binmodel_data_factory.build(test_i, mergedohlcv_df) + + # ===================== + # check results + X_UP = binmodel_data[UP].X + ytrue_UP = binmodel_data[UP].ytrue + + assert_array_equal(X_UP, target_X_UP) + assert_array_equal(ytrue_UP, target_ytrue_UP) diff --git a/pdr_backend/binmodel/test/test_binmodel_factory.py b/pdr_backend/binmodel/test/test_binmodel_factory.py new file mode 100644 index 000000000..322685b4c --- /dev/null +++ b/pdr_backend/binmodel/test/test_binmodel_factory.py @@ -0,0 +1,46 @@ +from unittest.mock import Mock + +from enforce_typing import enforce_types + +from pdr_backend.binmodel.binmodel import Binmodel +from pdr_backend.binmodel.binmodel_factory import BinmodelFactory +from pdr_backend.binmodel.test.resources import get_binmodel_data +from pdr_backend.ppss.aimodel_ss import AimodelSS, aimodel_ss_test_dict + + +@enforce_types +def test_binmodel_factory__attributes(): + f: BinmodelFactory = _get_binmodel_factory() + assert isinstance(f.aimodel_ss, AimodelSS) + + +@enforce_types +def test_binmodel_factory__do_build(): + f: BinmodelFactory = _get_binmodel_factory() + f.aimodel_ss.set_train_every_n_epochs(13) + + # case: no previous model + assert f.do_build(None, 0) + + # case: have previous model; then on proper iter? + prev_model = Mock(spec=Binmodel) + assert f.do_build(prev_model, test_i=13 * 4) + assert not f.do_build(prev_model, test_i=13 * 4 + 1) + + +@enforce_types +def test_binmodel_factory__build(): + f: BinmodelFactory = _get_binmodel_factory() + + data = get_binmodel_data() + model = f.build(data) + assert isinstance(model, Binmodel) + + p = model.predict_next(data.X_test) + assert p is not None # don't test further; leave that to test_binmodel.py + + +@enforce_types +def _get_binmodel_factory() -> BinmodelFactory: + aimodel_ss = AimodelSS(aimodel_ss_test_dict()) + return BinmodelFactory(aimodel_ss) diff --git a/pdr_backend/binmodel/test/test_binmodel_prediction.py b/pdr_backend/binmodel/test/test_binmodel_prediction.py new file mode 100644 index 000000000..8bac7a6eb --- /dev/null +++ b/pdr_backend/binmodel/test/test_binmodel_prediction.py @@ -0,0 +1,115 @@ +from enforce_typing import enforce_types +import pytest +from pytest import approx + +from pdr_backend.binmodel.binmodel_prediction import ( + BinmodelPrediction, + _do_trust_models, + _models_in_conflict, +) + + +@enforce_types +def test_binmodel_prediction_case1_models_in_conflict(): + p = BinmodelPrediction(conf_thr=0.1, prob_UP=0.6, prob_DOWN=0.7) + assert p.conf_thr == 0.1 + assert p.prob_UP == 0.6 + assert p.prob_DOWN == 0.7 + + assert p.models_in_conflict() + assert p.conf_up == 0.0 + assert p.conf_down == 0.0 + assert not p.pred_up + assert not p.pred_down + assert p.prob_up_MERGED == 0.5 + + +@enforce_types +def test_binmodel_prediction_case2_up_dominates(): + p = BinmodelPrediction(conf_thr=0.1, prob_UP=0.6, prob_DOWN=0.3) + assert p.conf_thr == 0.1 + assert p.prob_UP == 0.6 + assert p.prob_DOWN == 0.3 + + assert not p.models_in_conflict() + assert p.prob_UP >= p.prob_DOWN + assert p.conf_up == approx((0.6 - 0.5) * 2.0) == approx(0.1 * 2.0) == 0.2 + assert p.conf_down == 0.0 + assert p.pred_up == (p.conf_up > p.conf_thr) == True # 0.2 > 0.1 + assert not p.pred_down + assert p.prob_up_MERGED == approx(0.6) + + # setup like above, but now with higher conf thr, which it can't exceed + p = BinmodelPrediction(conf_thr=0.3, prob_UP=0.6, prob_DOWN=0.3) + assert not p.pred_up + + +@enforce_types +def test_binmodel_prediction_case3_down_dominates(): + p = BinmodelPrediction(conf_thr=0.1, prob_UP=0.4, prob_DOWN=0.7) + assert p.conf_thr == 0.1 + assert p.prob_UP == 0.4 + assert p.prob_DOWN == 0.7 + + assert not p.models_in_conflict() + assert p.prob_DOWN > p.prob_UP + assert p.conf_up == 0.0 + assert p.conf_down == approx((0.7 - 0.5) * 2.0) == approx(0.2 * 2.0) == 0.4 + assert not p.pred_up + assert p.pred_down == (p.conf_down > p.conf_thr) == True # 0.4 > 0.1 + assert p.prob_up_MERGED == approx(1.0 - 0.7) == approx(0.3) + + # setup like above, but now with higher conf thr, which it can't exceed + p = BinmodelPrediction(conf_thr=0.5, prob_UP=0.4, prob_DOWN=0.7) + assert not p.pred_down + + +@enforce_types +def test_do_trust_models_unhappy_path(): + for tup in [ + # out of range + (True, False, -1.4, 0.6), + (True, False, 1.4, 0.6), + (True, False, 0.4, -1.6), + (True, False, 0.4, 1.6), + # values conflict + (True, True, 0.4, 0.6), # pred_up and pred_down + (True, False, 0.4, 0.6), # pred_up and prob_DOWN > prob_UP + (False, True, 0.6, 0.4), # pred_down and prob_UP > prob_DOWN + ]: + (pred_up, pred_down, prob_UP, prob_DOWN) = tup + with pytest.raises(ValueError): + _do_trust_models(pred_up, pred_down, prob_UP, prob_DOWN) + print(f"Should have failed on {tup}") + + +@enforce_types +def test_do_trust_models_happy_path(): + assert _do_trust_models(True, False, 0.6, 0.4) + assert _do_trust_models(False, True, 0.4, 0.6) + + assert not _do_trust_models(False, False, 0.4, 0.6) + assert not _do_trust_models(False, True, 0.4, 0.4) + assert not _do_trust_models(False, True, 0.6, 0.6) + + +@enforce_types +def test_models_in_conflict_unhappy_path(): + for prob_UP, prob_DOWN in [ + (-1.6, 0.6), + (1.6, 0.6), + (0.6, -1.6), + (0.6, 1.6), + ]: + with pytest.raises(ValueError): + _models_in_conflict(prob_UP, prob_DOWN) + + +@enforce_types +def test_models_in_conflict_happy_path(): + assert _models_in_conflict(0.6, 0.6) + assert _models_in_conflict(0.4, 0.4) + + assert not _models_in_conflict(0.6, 0.4) + assert not _models_in_conflict(0.4, 0.6) + assert not _models_in_conflict(0.5, 0.5) diff --git a/pdr_backend/cli/arg_feed.py b/pdr_backend/cli/arg_feed.py index 6717b6c6f..838365e32 100644 --- a/pdr_backend/cli/arg_feed.py +++ b/pdr_backend/cli/arg_feed.py @@ -93,6 +93,14 @@ def from_str(feed_str: str, do_verify: bool = True) -> "ArgFeed": feed = feeds[0] return feed + def variant_signal(self, signal_str: str) -> "ArgFeed": + return ArgFeed( + self.exchange, + signal_str, + self.pair, + self.timeframe, + ) + @enforce_types def _unpack_feeds_str(feeds_str: str) -> List[ArgFeed]: diff --git a/pdr_backend/cli/cli_module.py b/pdr_backend/cli/cli_module.py index 2ac532085..1c195523e 100644 --- a/pdr_backend/cli/cli_module.py +++ b/pdr_backend/cli/cli_module.py @@ -90,10 +90,9 @@ def do_sim(args, nested_args=None): network="development", nested_override_args=nested_args, ) - feedset = ppss.predictoor_ss.predict_train_feedsets[0] if len(ppss.predictoor_ss.predict_train_feedsets) > 0: logger.warning("Multiple predict feeds provided, using the first one") - sim_engine = SimEngine(ppss, feedset) + sim_engine = SimEngine(ppss) sim_engine.run() diff --git a/pdr_backend/ppss/aimodel_data_ss.py b/pdr_backend/ppss/aimodel_data_ss.py index 94ac037ce..4933f1957 100644 --- a/pdr_backend/ppss/aimodel_data_ss.py +++ b/pdr_backend/ppss/aimodel_data_ss.py @@ -26,6 +26,7 @@ def __init__(self, d: dict): # test inputs self.validate_max_n_train(self.max_n_train) self.validate_autoregressive_n(self.autoregressive_n) + self.validate_class_thr(self.class_thr) self.validate_transform(self.transform) # -------------------------------- @@ -40,6 +41,11 @@ def validate_autoregressive_n(autoregressive_n: int): if not 0 < autoregressive_n < np.inf: raise ValueError(autoregressive_n) + @staticmethod + def validate_class_thr(class_thr: float): + if not 0 <= class_thr <= 1.0: + raise ValueError(class_thr) + @staticmethod def validate_transform(transform: str): if transform not in TRANSFORM_OPTIONS: @@ -63,6 +69,11 @@ def autoregressive_n(self) -> int: """ return self.d["autoregressive_n"] + @property + def class_thr(self) -> float: + """eg 0.05 = 5%. UP class needs > this, DOWN < this""" + return self.d["class_thr"] + @property def transform(self) -> str: """eg 'RelDiff'""" @@ -78,6 +89,10 @@ def set_autoregressive_n(self, autoregressive_n: int): self.validate_autoregressive_n(autoregressive_n) self.d["autoregressive_n"] = autoregressive_n + def set_class_thr(self, class_thr: float): + self.validate_class_thr(class_thr) + self.d["class_thr"] = class_thr + def set_transform(self, transform: str): self.validate_transform(transform) self.d["transform"] = transform @@ -91,12 +106,14 @@ def set_transform(self, transform: str): def aimodel_data_ss_test_dict( max_n_train: Optional[int] = None, autoregressive_n: Optional[int] = None, + class_thr: Optional[float] = None, transform: Optional[str] = None, ) -> dict: """Use this function's return dict 'd' to construct AimodelDataSS(d)""" d = { "max_n_train": 7 if max_n_train is None else max_n_train, "autoregressive_n": 3 if autoregressive_n is None else autoregressive_n, + "class_thr": 0.002 if class_thr is None else class_thr, "transform": transform or "None", } return d diff --git a/pdr_backend/ppss/test/test_aimodel_data_ss.py b/pdr_backend/ppss/test/test_aimodel_data_ss.py index 2830dfe0c..6a68cd9c4 100644 --- a/pdr_backend/ppss/test/test_aimodel_data_ss.py +++ b/pdr_backend/ppss/test/test_aimodel_data_ss.py @@ -19,6 +19,7 @@ def test_aimodel_data_ss__default_values(): assert ss.max_n_train == d["max_n_train"] == 7 assert ss.autoregressive_n == d["autoregressive_n"] == 3 + assert ss.class_thr == d["class_thr"] == 0.002 assert ss.transform == d["transform"] == "None" # str @@ -37,6 +38,15 @@ def test_aimodel_data_ss__nondefault_values(): ss = AimodelDataSS(aimodel_data_ss_test_dict(autoregressive_n=13)) assert ss.autoregressive_n == 13 + ss = AimodelDataSS(aimodel_data_ss_test_dict(class_thr=0.06)) + assert ss.class_thr == 0.06 + + ss = AimodelDataSS(aimodel_data_ss_test_dict(class_thr=0.0)) + assert ss.class_thr == 0.0 + + ss = AimodelDataSS(aimodel_data_ss_test_dict(class_thr=1.0)) + assert ss.class_thr == 1.0 + ss = AimodelDataSS(aimodel_data_ss_test_dict(transform="RelDiff")) assert ss.transform == "RelDiff" @@ -49,6 +59,18 @@ def test_aimodel_data_ss__bad_inputs(): with pytest.raises(TypeError): AimodelDataSS(aimodel_data_ss_test_dict(max_n_train=3.1)) + with pytest.raises(ValueError): + AimodelDataSS(aimodel_data_ss_test_dict(class_thr=-0.1)) + + with pytest.raises(ValueError): + AimodelDataSS(aimodel_data_ss_test_dict(class_thr=1.1)) + + with pytest.raises(TypeError): # floats only, for simplicity + AimodelDataSS(aimodel_data_ss_test_dict(class_thr=0)) + + with pytest.raises(TypeError): # floats only, for simplicity + AimodelDataSS(aimodel_data_ss_test_dict(class_thr=1)) + with pytest.raises(ValueError): AimodelDataSS(aimodel_data_ss_test_dict(autoregressive_n=0)) @@ -86,6 +108,12 @@ def test_aimodel_data_ss__setters(): with pytest.raises(ValueError): ss.set_autoregressive_n(-5) + # class_thr + ss.set_class_thr(0.34) + assert ss.class_thr == 0.34 + with pytest.raises(ValueError): + ss.set_class_thr(-0.1) + # transform ss.set_transform("RelDiff") assert ss.transform == "RelDiff" diff --git a/pdr_backend/sim/calc_pdr_profit.py b/pdr_backend/sim/calc_pdr_profit.py new file mode 100644 index 000000000..e7dfd44cb --- /dev/null +++ b/pdr_backend/sim/calc_pdr_profit.py @@ -0,0 +1,32 @@ +from enforce_typing import enforce_types + + +@enforce_types +def calc_pdr_profit( + others_stake: float, + others_accuracy: float, + stake_up: float, + stake_down: float, + revenue: float, + true_up_close: bool, +) -> float: + assert others_stake >= 0 + assert 0.0 <= others_accuracy <= 1.0 + assert stake_up >= 0.0 + assert stake_down >= 0.0 + assert revenue >= 0.0 + + amt_sent = stake_up + stake_down + others_stake_correct = others_stake * others_accuracy + tot_stake = others_stake + stake_up + stake_down + if true_up_close: + tot_stake_correct = others_stake_correct + stake_up + percent_to_me = stake_up / tot_stake_correct + amt_received = (revenue + tot_stake) * percent_to_me + else: + tot_stake_correct = others_stake_correct + stake_down + percent_to_me = stake_down / tot_stake_correct + amt_received = (revenue + tot_stake) * percent_to_me + pdr_profit_OCEAN = amt_received - amt_sent + + return float(pdr_profit_OCEAN) diff --git a/pdr_backend/sim/dash_plots/callbacks.py b/pdr_backend/sim/dash_plots/callbacks.py index 95709d13a..4c7b91353 100644 --- a/pdr_backend/sim/dash_plots/callbacks.py +++ b/pdr_backend/sim/dash_plots/callbacks.py @@ -6,12 +6,14 @@ from dash import Input, Output, State +from pdr_backend.binmodel.constants import UP from pdr_backend.sim.dash_plots.util import get_figures_by_state from pdr_backend.sim.dash_plots.view_elements import ( get_tabs, get_header_elements, get_waiting_template, - selected_var_checklist, + selected_var_UP_checklist, + selected_var_DOWN_checklist, get_tabs_component, ) from pdr_backend.sim.sim_plotter import SimPlotter @@ -42,32 +44,58 @@ def callback_func_start_stop_interval(value, disabled_state): return value == "finalState" @app.callback( - Output("selected_vars", "value"), - Input("aimodel_varimps", "clickData"), - State("selected_vars", "value"), + Output("selected_vars_UP", "value"), + Input("aimodel_varimps_UP", "clickData"), + State("selected_vars_UP", "value"), ) - def update_selected_vars(clickData, selected_vars): + def update_selected_vars_UP(clickData, selected_vars_UP): if clickData is None: - return selected_vars + return selected_vars_UP label = clickData["points"][0]["y"] - if label in selected_vars: - selected_vars.remove(label) + if label in selected_vars_UP: + selected_vars_UP.remove(label) else: - selected_vars.append(label) + selected_vars_UP.append(label) - return selected_vars + return selected_vars_UP + + @app.callback( + Output("selected_vars_DOWN", "value"), + Input("aimodel_varimps_DOWN", "clickData"), + State("selected_vars_DOWN", "value"), + ) + def update_selected_vars_DOWN(clickData, selected_vars_DOWN): + if clickData is None: + return selected_vars_DOWN + + label = clickData["points"][0]["y"] + if label in selected_vars_DOWN: + selected_vars_DOWN.remove(label) + else: + selected_vars_DOWN.append(label) + + return selected_vars_DOWN @app.callback( Output("tabs-container", "children"), Output("header", "children"), Input("interval-component", "n_intervals"), - Input("selected_vars", "value"), - State("selected_vars", "value"), + Input("selected_vars_UP", "value"), + Input("selected_vars_DOWN", "value"), + State("selected_vars_UP", "value"), + State("selected_vars_DOWN", "value"), State("selected-tab", "data"), ) # pylint: disable=unused-argument - def update_graph_live(n, selected_vars, selected_vars_old, selected_tab): + def update_graph_live( + n, + selected_vars_UP, + selected_vars_DOWN, + selected_vars_UP_old, + selected_vars_DOWN_old, + selected_tab, + ): try: run_id = app.run_id if app.run_id else SimPlotter.get_latest_run_id() sim_plotter = SimPlotter() @@ -78,12 +106,20 @@ def update_graph_live(n, selected_vars, selected_vars_old, selected_tab): header = get_header_elements(run_id, st, ts) elements = [] - state_options = sim_plotter.aimodel_plotdata.colnames - elements.append(selected_var_checklist(state_options, selected_vars_old)) + state_options = sim_plotter.aimodel_plotdata[UP].colnames + + cU = selected_var_UP_checklist(state_options, selected_vars_UP_old) + cD = selected_var_DOWN_checklist(state_options, selected_vars_DOWN_old) + elements += [cU, cD] timeout = 2 if ts != "final" or n < 2 else 10 + figures = get_figures_by_state( + sim_plotter, + selected_vars_UP, + selected_vars_DOWN, + timeout, + ) - figures = get_figures_by_state(sim_plotter, selected_vars, timeout=timeout) tabs = get_tabs(figures) selected_tab_value = selected_tab if selected_tab else tabs[0]["name"] elements = elements + [get_tabs_component(tabs, selected_tab_value)] diff --git a/pdr_backend/sim/dash_plots/util.py b/pdr_backend/sim/dash_plots/util.py index 69657098d..3f12fdac6 100644 --- a/pdr_backend/sim/dash_plots/util.py +++ b/pdr_backend/sim/dash_plots/util.py @@ -2,40 +2,66 @@ # Copyright 2024 Ocean Protocol Foundation # SPDX-License-Identifier: Apache-2.0 # +from typing import List + +from enforce_typing import enforce_types import plotly.graph_objects as go import stopit from pdr_backend.aimodel import aimodel_plotter -from pdr_backend.sim.dash_plots.view_elements import figure_names +from pdr_backend.binmodel.constants import Dirn, UP, DOWN +from pdr_backend.sim.dash_plots.view_elements import ( + FIGURE_NAMES, + OTHER_FIGURES, + MODEL_RESPONSE_FIGURES, +) from pdr_backend.sim.sim_plotter import SimPlotter -def get_figures_by_state(sim_plotter: SimPlotter, selected_vars, timeout=2): - figures = {} +@enforce_types +def get_figures_by_state( + sim_plotter: SimPlotter, + selected_vars_UP: List[str], # UP model selected varnames + selected_vars_DOWN: List[str], # DOWN "" + timeout: int = 2, +): + figs = {} - for key in figure_names: - if not key.startswith("aimodel"): + for fig_name in FIGURE_NAMES: + if fig_name in OTHER_FIGURES: with stopit.ThreadingTimeout(timeout) as context_manager: - fig = getattr(sim_plotter, f"plot_{key}")() + fig = getattr(sim_plotter, f"plot_{fig_name}")() - if context_manager.state == context_manager.TIMED_OUT: - fig = go.Figure() - else: + elif fig_name in MODEL_RESPONSE_FIGURES: with stopit.ThreadingTimeout(timeout) as context_manager: - if key in ["aimodel_response", "aimodel_varimps"]: - sweep_vars = [] - for var in selected_vars: - sweep_vars.append( - sim_plotter.aimodel_plotdata.colnames.index(var) - ) - sim_plotter.aimodel_plotdata.sweep_vars = sweep_vars + dirn = UP if "UP" in fig_name else DOWN + aimodel_plotdata = sim_plotter.aimodel_plotdata[dirn] + colnames = aimodel_plotdata.colnames + sel_vars = _sel_vars(dirn, selected_vars_UP, selected_vars_DOWN) + sel_Is = [colnames.index(var) for var in sel_vars] + aimodel_plotdata.sweep_vars = sel_Is + func_name = _func_name(fig_name) + func = getattr(aimodel_plotter, func_name) + fig = func(aimodel_plotdata) + + else: + raise ValueError(fig_name) + + if context_manager.state == context_manager.TIMED_OUT: + fig = go.Figure() + + figs[fig_name] = fig + + return figs - func_name = getattr(aimodel_plotter, f"plot_{key}") - fig = func_name(sim_plotter.aimodel_plotdata) - if context_manager.state == context_manager.TIMED_OUT: - fig = go.Figure() +@enforce_types +def _sel_vars(dirn: Dirn, sel_vars_UP, sel_vars_DOWN) -> List[str]: + if dirn == UP: + return sel_vars_UP + return sel_vars_DOWN - figures[key] = fig - return figures +@enforce_types +def _func_name(fig_name: str) -> str: + return f"plot_{fig_name}".replace("_UP", "").replace("_DOWN", "") diff --git a/pdr_backend/sim/dash_plots/view_elements.py b/pdr_backend/sim/dash_plots/view_elements.py index 035c546a6..3602119e3 100644 --- a/pdr_backend/sim/dash_plots/view_elements.py +++ b/pdr_backend/sim/dash_plots/view_elements.py @@ -6,23 +6,30 @@ from enforce_typing import enforce_types from plotly.graph_objs import Figure -figure_names = [ +OTHER_FIGURES = [ "pdr_profit_vs_time", "pdr_profit_vs_ptrue", "trader_profit_vs_time", "trader_profit_vs_ptrue", "model_performance_vs_time", - "aimodel_varimps", - "aimodel_response", - "prediction_residuals_dist", - "prediction_residuals_other", ] -empty_selected_vars = dcc.Checklist([], [], id="selected_vars") +MODEL_RESPONSE_FIGURES = [ + "aimodel_varimps_UP", + "aimodel_response_UP", + "aimodel_varimps_DOWN", + "aimodel_response_DOWN", +] + +FIGURE_NAMES = OTHER_FIGURES + MODEL_RESPONSE_FIGURES + +EMPTY_SELECTED_VARS_UP = dcc.Checklist([], [], id="selected_vars_UP") +EMPTY_SELECTED_VARS_DOWN = dcc.Checklist([], [], id="selected_vars_DOWN") -empty_graphs_template = html.Div( - [dcc.Graph(figure=Figure(), id=key) for key in figure_names] - + [empty_selected_vars], +EMPTY_GRAPHS_TEMPLATE = html.Div( + [dcc.Graph(figure=Figure(), id=name) for name in FIGURE_NAMES] + + [EMPTY_SELECTED_VARS_UP] + + [EMPTY_SELECTED_VARS_DOWN], style={"display": "none"}, ) @@ -31,7 +38,7 @@ def get_waiting_template(err): return html.Div( [html.H2(f"Error/waiting: {err}", id="sim_state_text")] - + [empty_graphs_template], + + [EMPTY_GRAPHS_TEMPLATE], id="live-graphs", ) @@ -98,7 +105,7 @@ def get_tabs(figures): "name": "Predictoor Profit", "components": [ single_graph(figures, "pdr_profit_vs_time", width="100%"), - single_graph(figures, "pdr_profit_vs_ptrue", width="50%"), + single_graph(figures, "pdr_profit_vs_ptrue", width="100%"), ], "className": "predictor_profit_tab", }, @@ -106,7 +113,7 @@ def get_tabs(figures): "name": "Trader Profit", "components": [ single_graph(figures, "trader_profit_vs_time", width="100%"), - single_graph(figures, "trader_profit_vs_ptrue", width="50%"), + single_graph(figures, "trader_profit_vs_ptrue", width="100%"), ], "className": "trader_profit_tab", }, @@ -122,38 +129,42 @@ def get_tabs(figures): "components": [ side_by_side_graphs( figures, - name1="aimodel_varimps", - name2="aimodel_response", + name1="aimodel_varimps_UP", + name2="aimodel_response_UP", height="100%", width1="30%", width2="70%", - ) - ], - "className": "model_response_tab", - }, - { - "name": "Model residuals", - "components": [ + ), side_by_side_graphs( figures, - name1="prediction_residuals_dist", - name2="prediction_residuals_other", + name1="aimodel_varimps_DOWN", + name2="aimodel_response_DOWN", height="100%", - width1="60%", - width2="40%", + width1="30%", + width2="70%", ), ], - "className": "model_residuals_tab", + "className": "model_response_tab", }, ] @enforce_types -def selected_var_checklist(state_options, selected_vars_old): +def selected_var_UP_checklist(state_options, selected_vars_UP_old): + return dcc.Checklist( + options=[{"label": var, "value": var} for var in state_options], + value=selected_vars_UP_old, + id="selected_vars_UP", + style={"display": "none"}, + ) + + +@enforce_types +def selected_var_DOWN_checklist(state_options, selected_vars_DOWN_old): return dcc.Checklist( options=[{"label": var, "value": var} for var in state_options], - value=selected_vars_old, - id="selected_vars", + value=selected_vars_DOWN_old, + id="selected_vars_DOWN", style={"display": "none"}, ) @@ -191,7 +202,7 @@ def get_main_container(): return html.Div( [ html.Div( - empty_graphs_template, + EMPTY_GRAPHS_TEMPLATE, id="header", style={ "display": "flex", diff --git a/pdr_backend/sim/multisim_engine.py b/pdr_backend/sim/multisim_engine.py index 7fad59694..ce6d06b6f 100644 --- a/pdr_backend/sim/multisim_engine.py +++ b/pdr_backend/sim/multisim_engine.py @@ -10,7 +10,6 @@ import uuid from typing import List, Union -import numpy as np import pandas as pd from enforce_typing import enforce_types @@ -70,30 +69,15 @@ async def run_one(self, run_i: int): point_i = self.ss.point_i(run_i) logger.info("Multisim run_i=%s: start. Vals=%s", run_i, point_i) ppss = self.ppss_from_point(point_i) - feedset = ppss.predictoor_ss.predict_train_feedsets[0] multi_id = str(uuid.uuid4()) - sim_engine = SimEngine(ppss, feedset, multi_id) + sim_engine = SimEngine(ppss, multi_id) sim_engine.disable_realtime_state() sim_engine.run() st = sim_engine.st - recent_metrics = st.recent_metrics() - - # below, the "[1:]" is to avoid the first sample, which may be off - run_metrics = { - "acc_est": recent_metrics["acc_est"], - "acc_l": recent_metrics["acc_l"], - "acc_u": recent_metrics["acc_u"], - "f1": np.mean(st.aim.f1s), - "precision": np.mean(st.aim.precisions[1:]), - "recall": np.mean(st.aim.recalls[1:]), - "loss": np.mean(st.aim.losses[1:]), - "yerr": np.mean(st.aim.yerrs[1:]), - "pdr_profit_OCEAN": np.sum(st.pdr_profits_OCEAN), - "trader_profit_USD": np.sum(st.trader_profits_USD), - } - run_metrics_list = list(run_metrics.values()) + metrics_values = st.final_metrics_values() + metrics_list = [metrics_values[name] for name in st.metrics_names()] async with lock: - self.update_csv(run_i, run_metrics_list, point_i) + self.update_csv(run_i, metrics_list, point_i) logger.info("Multisim run_i=%s: done", run_i) logger.info("Multisim engine: done. Output file: %s", self.csv_file) @@ -117,7 +101,7 @@ def csv_header(self) -> List[str]: # put metrics first, because point_meta names/values can be superlong header = [] header += ["run_number"] - header += SimState.recent_metrics_names() + header += SimState.metrics_names() header += list(self.ss.point_meta.keys()) return header @@ -126,7 +110,7 @@ def spaces(self) -> List[int]: buf = 3 spaces = [] spaces += [len("run_number") + buf] - spaces += [max(len(name), 6) + buf for name in SimState.recent_metrics_names()] + spaces += [max(len(name), 6) + buf for name in SimState.metrics_names()] for var, cand_vals in self.ss.point_meta.items(): var_len = len(var) @@ -159,7 +143,7 @@ def update_csv( @arguments run_i - it's run #i - run_metrics -- output of SimState.recent_metrics() for run #i + run_metrics -- output of SimState.recent_metrics_values() for run #i point_i -- value of each sweep param, for run #i """ assert os.path.exists(self.csv_file), self.csv_file diff --git a/pdr_backend/sim/sim_engine.py b/pdr_backend/sim/sim_engine.py index f8800dda6..7099da605 100644 --- a/pdr_backend/sim/sim_engine.py +++ b/pdr_backend/sim/sim_engine.py @@ -1,32 +1,29 @@ -# -# Copyright 2024 Ocean Protocol Foundation -# SPDX-License-Identifier: Apache-2.0 -# import logging import os import uuid -from typing import Optional +from typing import Optional, Tuple -import numpy as np import polars as pl from enforce_typing import enforce_types -from sklearn.metrics import log_loss, precision_recall_fscore_support -from statsmodels.stats.proportion import proportion_confint -from pdr_backend.aimodel.aimodel import Aimodel from pdr_backend.aimodel.aimodel_data_factory import AimodelDataFactory -from pdr_backend.aimodel.aimodel_factory import AimodelFactory from pdr_backend.aimodel.aimodel_plotdata import AimodelPlotdata -from pdr_backend.aimodel.ycont_to_ytrue import ycont_to_ytrue +from pdr_backend.binmodel.binmodel_data_factory import BinmodelDataFactory +from pdr_backend.binmodel.binmodel_factory import BinmodelFactory +from pdr_backend.binmodel.binmodel_prediction import BinmodelPrediction +from pdr_backend.binmodel.constants import Dirn, UP, DOWN from pdr_backend.cli.arg_feed import ArgFeed +from pdr_backend.cli.arg_feeds import ArgFeeds from pdr_backend.cli.arg_timeframe import ArgTimeframe -from pdr_backend.cli.predict_train_feedsets import PredictTrainFeedset from pdr_backend.lake.ohlcv_data_factory import OhlcvDataFactory from pdr_backend.ppss.ppss import PPSS +from pdr_backend.ppss.predictoor_ss import PredictoorSS +from pdr_backend.sim.calc_pdr_profit import calc_pdr_profit from pdr_backend.sim.sim_logger import SimLogLine from pdr_backend.sim.sim_plotter import SimPlotter +from pdr_backend.sim.sim_predictoor import SimPredictoor +from pdr_backend.sim.sim_state import HistProfits, SimState from pdr_backend.sim.sim_trader import SimTrader -from pdr_backend.sim.sim_state import SimState from pdr_backend.util.strutil import shift_one_earlier from pdr_backend.util.time_types import UnixTimeMs @@ -34,25 +31,24 @@ # pylint: disable=too-many-instance-attributes +@enforce_types class SimEngine: - @enforce_types def __init__( self, ppss: PPSS, - predict_train_feedset: PredictTrainFeedset, multi_id: Optional[str] = None, ): - self.predict_train_feedset = predict_train_feedset - assert isinstance(self.predict_feed, ArgFeed) - assert self.predict_feed.signal == "close", "only operates on close predictions" self.ppss = ppss + assert self.predict_feed.signal == "close", "only operates on close predictions" + # can be disabled by calling disable_realtime_state() self.do_state_updates = True self.st = SimState() - self.trader = SimTrader(ppss, self.predict_feed) + self.sim_predictoor = SimPredictoor(ppss.predictoor_ss) + self.sim_trader = SimTrader(ppss) self.sim_plotter = SimPlotter() @@ -63,13 +59,33 @@ def __init__( else: self.multi_id = str(uuid.uuid4()) - self.model: Optional[Aimodel] = None + assert self.pdr_ss.aimodel_data_ss.transform == "None" + + @property + def pdr_ss(self) -> PredictoorSS: + return self.ppss.predictoor_ss @property def predict_feed(self) -> ArgFeed: - return self.predict_train_feedset.predict + return self.pdr_ss.predict_train_feedsets[0].predict + + @property + def timeframe(self) -> ArgTimeframe: + assert self.predict_feed.timeframe is not None + return self.predict_feed.timeframe + + @property + def others_stake(self) -> float: + return float(self.pdr_ss.others_stake.amt_eth) + + @property + def others_accuracy(self) -> float: + return float(self.pdr_ss.others_accuracy) + + @property + def revenue(self) -> float: + return self.pdr_ss.revenue.amt_eth - @enforce_types def _init_loop_attributes(self): filebase = f"out_{UnixTimeMs.now()}.txt" self.logfile = os.path.join(self.ppss.sim_ss.log_dir, filebase) @@ -79,174 +95,157 @@ def _init_loop_attributes(self): logger.addHandler(fh) self.st.init_loop_attributes() + logger.info("Initialize plot data.") self.sim_plotter.init_state(self.multi_id) - @enforce_types def run(self): logger.info("Start run") + + # initialize self._init_loop_attributes() - # main loop! + # ohclv data f = OhlcvDataFactory(self.ppss.lake_ss) mergedohlcv_df = f.get_mergedohlcv_df() - for test_i in range(self.ppss.sim_ss.test_n): - self.run_one_iter(test_i, mergedohlcv_df) + # main loop! + for iter_i in range(self.ppss.sim_ss.test_n): + self.run_one_iter(iter_i, mergedohlcv_df) + + # done logger.info("Done all iters.") # pylint: disable=too-many-statements# pylint: disable=too-many-statements - @enforce_types - def run_one_iter(self, test_i: int, mergedohlcv_df: pl.DataFrame): - ppss, pdr_ss, st = self.ppss, self.ppss.predictoor_ss, self.st - transform = pdr_ss.aimodel_data_ss.transform - stake_amt = pdr_ss.stake_amount.amt_eth - others_stake = pdr_ss.others_stake.amt_eth - revenue = pdr_ss.revenue.amt_eth - - testshift = ppss.sim_ss.test_n - test_i - 1 # eg [99, 98, .., 2, 1, 0] - data_f = AimodelDataFactory(pdr_ss) # type: ignore[arg-type] - predict_feed = self.predict_train_feedset.predict - train_feeds = self.predict_train_feedset.train_on - - # X, ycont, and x_df are all expressed in % change wrt prev candle - X, ytran, yraw, x_df, _ = data_f.create_xy( - mergedohlcv_df, - testshift, - predict_feed, - train_feeds, + def run_one_iter(self, iter_i: int, mergedohlcv_df: pl.DataFrame): + # base data + st = self.st + df = mergedohlcv_df + binmodel_data_f = BinmodelDataFactory(self.ppss) + testshift = binmodel_data_f.testshift(iter_i) + + # observe current price value, and related thresholds for classifier + cur_close = self._curval(df, testshift, "close") + cur_high = self._curval(df, testshift, "high") + cur_low = self._curval(df, testshift, "low") + y_thr_UP = binmodel_data_f.thr_UP(cur_close) + y_thr_DOWN = binmodel_data_f.thr_DOWN(cur_close) + + # build model + model_factory = BinmodelFactory(self.pdr_ss.aimodel_ss) + st.binmodel_data = binmodel_data_f.build(iter_i, df) + if model_factory.do_build(st.binmodel, iter_i): + st.binmodel = model_factory.build(st.binmodel_data) + + # make prediction + predprob = self.st.binmodel.predict_next(st.binmodel_data.X_test) + + conf_thr = self.ppss.trader_ss.sim_confidence_threshold + binmodel_p = BinmodelPrediction(conf_thr, predprob[UP], predprob[DOWN]) + + # predictoor takes action (stake) + stake_up, stake_down = self.sim_predictoor.predict_iter(binmodel_p) + + # trader takes action (trade) + trader_profit_USD = self.sim_trader.trade_iter( + cur_close, + cur_high, + cur_low, + binmodel_p, ) - colnames = list(x_df.columns) - - st_, fin = 0, X.shape[0] - 1 - X_train, X_test = X[st_:fin, :], X[fin : fin + 1, :] - ytran_train, _ = ytran[st_:fin], ytran[fin : fin + 1] - cur_high, cur_low = data_f.get_highlow(mergedohlcv_df, predict_feed, testshift) - - cur_close = yraw[-2] - next_close = yraw[-1] + # observe next price values + next_close = self._nextval(df, testshift, "close") + next_high = self._nextval(df, testshift, "high") + next_low = self._nextval(df, testshift, "low") + + # observe price change prev -> next, and related changes for classifier + trueval_up_close = next_close > cur_close + trueval = { + UP: next_high > y_thr_UP, # did next high go > prev close+% ? + DOWN: next_low < y_thr_DOWN, # did next low go < prev close-% ? + } + + # calc predictoor profit + pdr_profit_OCEAN = calc_pdr_profit( + self.others_stake, + self.others_accuracy, + stake_up, + stake_down, + self.revenue, + trueval_up_close, + ) - if transform == "None": - y_thr = cur_close - else: # transform = "RelDiff" - y_thr = 0.0 - ytrue = ycont_to_ytrue(ytran, y_thr) + # update state + st.update(trueval, predprob, pdr_profit_OCEAN, trader_profit_USD) + + # log + ut = self._calc_ut(df, testshift) + SimLogLine(self.ppss, self.st, iter_i, ut).log() + + # plot + do_save_state, is_final_state = self._do_save_state(iter_i) + if do_save_state: + d = self._aimodel_plotdata() + st.iter_number = iter_i + self.sim_plotter.save_state(st, d, is_final_state) + + def _aimodel_plotdata(self) -> dict: + d_UP = self._aimodel_plotdata_1dir(UP) + d_DOWN = self._aimodel_plotdata_1dir(DOWN) + return {UP: d_UP, DOWN: d_DOWN} + + def _aimodel_plotdata_1dir(self, dirn: Dirn) -> AimodelPlotdata: + st = self.st + model = st.binmodel[dirn] + model_data = st.binmodel_data[dirn] + + colnames = model_data.colnames + colnames = [shift_one_earlier(c) for c in colnames] + + most_recent_x = model_data.X[-1, :] + slicing_x = most_recent_x + d = AimodelPlotdata( + model, + model_data.X_train, + model_data.ytrue_train, + None, + None, + model_data.colnames, + slicing_x, + ) + return d - ytrue_train, _ = ytrue[st_:fin], ytrue[fin : fin + 1] + def _curval(self, df, testshift: int, signal_str: str) -> float: + # float() so not np.float64, bc applying ">" gives np.bool -> problems + return float(self._yraw(df, testshift, signal_str)[-2]) - if ( - self.model is None - or self.st.iter_number % pdr_ss.aimodel_ss.train_every_n_epochs == 0 - ): - model_f = AimodelFactory(pdr_ss.aimodel_ss) - self.model = model_f.build(X_train, ytrue_train, ytran_train, y_thr) + def _nextval(self, df, testshift: int, signal_str: str) -> float: + # float() so not np.float64, bc applying ">" gives np.bool -> problems + return float(self._yraw(df, testshift, signal_str)[-1]) - # current time - recent_ut = UnixTimeMs(int(mergedohlcv_df["timestamp"].to_list()[-1])) - timeframe: ArgTimeframe = predict_feed.timeframe # type: ignore - ut = UnixTimeMs(recent_ut - testshift * timeframe.ms) - - # predict price direction - prob_up: float = self.model.predict_ptrue(X_test)[0] # in [0.0, 1.0] - prob_down: float = 1.0 - prob_up - conf_up = (prob_up - 0.5) * 2.0 # to range [0,1] - conf_down = (prob_down - 0.5) * 2.0 # to range [0,1] - conf_threshold = self.ppss.trader_ss.sim_confidence_threshold - pred_up: bool = prob_up > 0.5 and conf_up > conf_threshold - pred_down: bool = prob_up < 0.5 and conf_down > conf_threshold - st.probs_up.append(prob_up) - - # predictoor: (simulate) submit predictions with stake - acct_up_profit = acct_down_profit = 0.0 - stake_up = stake_amt * prob_up - stake_down = stake_amt * (1.0 - prob_up) - acct_up_profit -= stake_up - acct_down_profit -= stake_down - - profit = self.trader.trade_iter( - cur_close, - pred_up, - pred_down, - conf_up, - conf_down, - cur_high, - cur_low, + def _yraw(self, mergedohlcv_df, testshift: int, signal_str: str): + assert signal_str in ["close", "high", "low"] + feed = self.predict_feed.variant_signal(signal_str) + aimodel_data_f = AimodelDataFactory(self.pdr_ss) + _, _, yraw, _, _ = aimodel_data_f.create_xy( + mergedohlcv_df, + testshift, + feed, + ArgFeeds([feed]), ) + return yraw - st.trader_profits_USD.append(profit) - - # observe true price - true_up = next_close > cur_close - st.ytrues.append(true_up) - - # update classifier metrics - n_correct = sum(np.array(st.ytrues) == np.array(st.ytrues_hat)) - n_trials = len(st.ytrues) - acc_est = n_correct / n_trials - acc_l, acc_u = proportion_confint(count=n_correct, nobs=n_trials) - (precision, recall, f1, _) = precision_recall_fscore_support( - st.ytrues, - st.ytrues_hat, - average="binary", - zero_division=0.0, - ) - if min(st.ytrues) == max(st.ytrues): - loss = 3.0 - else: - loss = log_loss(st.ytrues, st.probs_up) - yerr = 0.0 - if self.model.do_regr: - pred_ycont = self.model.predict_ycont(X_test)[0] - if transform == "None": - pred_next_close = pred_ycont - else: # transform = "RelDiff" - relchange = pred_ycont - pred_next_close = cur_close + relchange * cur_close - yerr = next_close - pred_next_close - - st.aim.update(acc_est, acc_l, acc_u, f1, precision, recall, loss, yerr) - - # track predictoor profit - tot_stake = others_stake + stake_amt - others_stake_correct = others_stake * pdr_ss.others_accuracy - if true_up: - tot_stake_correct = others_stake_correct + stake_up - percent_to_me = stake_up / tot_stake_correct - acct_up_profit += (revenue + tot_stake) * percent_to_me - else: - tot_stake_correct = others_stake_correct + stake_down - percent_to_me = stake_down / tot_stake_correct - acct_down_profit += (revenue + tot_stake) * percent_to_me - pdr_profit_OCEAN = acct_up_profit + acct_down_profit - st.pdr_profits_OCEAN.append(pdr_profit_OCEAN) - - SimLogLine(ppss, st, test_i, ut, acct_up_profit, acct_down_profit).log_line() - - save_state, is_final_state = self.save_state(test_i, self.ppss.sim_ss.test_n) - - if save_state: - colnames = [shift_one_earlier(colname) for colname in colnames] - most_recent_x = X[-1, :] - slicing_x = most_recent_x # plot about the most recent x - d = AimodelPlotdata( - self.model, - X_train, - ytrue_train, - ytran_train, - y_thr, - colnames, - slicing_x, - ) - self.st.iter_number = test_i - self.sim_plotter.save_state(self.st, d, is_final_state) + def _calc_ut(self, mergedohlcv_df, testshift: int) -> UnixTimeMs: + recent_ut = UnixTimeMs(int(mergedohlcv_df["timestamp"].to_list()[-1])) + ut = UnixTimeMs(recent_ut - testshift * self.timeframe.ms) + return ut def disable_realtime_state(self): self.do_state_updates = False - @enforce_types - def save_state(self, i: int, N: int): - "Save state on this iteration Y/N?" + def _do_save_state(self, i: int) -> Tuple[bool, bool]: + """For this iteration i, (a) save state? (b) is it final iteration?""" if self.ppss.sim_ss.is_final_iter(i): return True, True @@ -257,6 +256,7 @@ def save_state(self, i: int, N: int): # don't save first 5 iters -> not interesting # then save the next 5 -> "stuff's happening!" # then save every 5th iter, to balance "stuff's happening" w/ speed + N = self.ppss.sim_ss.test_n do_update = i >= 5 and (i < 10 or i % 5 == 0 or (i + 1) == N) if not do_update: return False, False diff --git a/pdr_backend/sim/sim_logger.py b/pdr_backend/sim/sim_logger.py index 68b7cc2d8..f3150b977 100644 --- a/pdr_backend/sim/sim_logger.py +++ b/pdr_backend/sim/sim_logger.py @@ -4,7 +4,6 @@ # import logging -import numpy as np from enforce_typing import enforce_types from pdr_backend.util.strutil import compactSmallNum @@ -13,48 +12,26 @@ @enforce_types -# pylint: disable=too-many-instance-attributes class SimLogLine: - def __init__(self, ppss, st, test_i, ut, acct_up_profit, acct_down_profit): + def __init__(self, ppss, st, test_i, ut): self.st = st - self.test_n = ppss.sim_ss.test_n self.test_i = test_i self.ut = ut - self.acct_up_profit = acct_up_profit - self.acct_down_profit = acct_down_profit - - self.n_correct = sum(np.array(st.ytrues) == np.array(st.ytrues_hat)) - self.n_trials = len(st.ytrues) - - for key, item in st.recent_metrics(extras=["prob_up"]).items(): - setattr(self, key, item) - # unused for now, but supports future configuration from ppss - self.format = "compact" - - def log_line(self): + def log(self): s = f"Iter #{self.test_i+1}/{self.test_n}" s += f" ut={self.ut}" s += f" dt={self.ut.to_timestr()[:-7]}" s += " â•‘" - s += f" prob_up={self.prob_up:.3f}" - s += " pdr_profit=" - s += f"{compactSmallNum(self.acct_up_profit)} up" - s += f" + {compactSmallNum(self.acct_down_profit)} down" - s += f" = {compactSmallNum(self.pdr_profit_OCEAN)} OCEAN" - s += f" (cumul {compactSmallNum(sum(self.st.pdr_profits_OCEAN))} OCEAN)" - s += " â•‘" - - s += f" Acc={self.n_correct:4d}/{self.n_trials:4d} " - s += f"= {self.acc_est*100:6.2f}% [{self.acc_l*100:5.1f}%, {self.acc_u*100:5.1f}%]" - s += f" prcsn={self.precision:.3f} recall={self.recall:.3f}" - s += f" f1={self.f1:.3f}" - s += f" loss={self.loss:.3f}" + pdr_profits = self.st.hist_profits.pdr_profits_OCEAN + s += f"pdr_profit={compactSmallNum(pdr_profits[-1])} OCEAN" + s += f" (cumul {compactSmallNum(sum(pdr_profits))} OCEAN)" s += " â•‘" - s += f" tdr_profit=${self.trader_profit_USD:6.2f}" - s += f" (cumul ${sum(self.st.trader_profits_USD):6.2f})" + trader_profits = self.st.hist_profits.trader_profits_USD + s += f" tdr_profit=${trader_profits[-1]:6.2f}" + s += f" (cumul ${sum(trader_profits):6.2f})" logger.info(s) diff --git a/pdr_backend/sim/sim_plotter.py b/pdr_backend/sim/sim_plotter.py index d9a5f0ebb..7b2f6bd15 100644 --- a/pdr_backend/sim/sim_plotter.py +++ b/pdr_backend/sim/sim_plotter.py @@ -15,20 +15,13 @@ import plotly.graph_objects as go from plotly.subplots import make_subplots -from pdr_backend.aimodel.aimodel_plotdata import AimodelPlotdata - -from pdr_backend.statutil.autocorrelation_plotdata import ( - AutocorrelationPlotdataFactory, -) -from pdr_backend.statutil.autocorrelation_plotter import add_corr_traces -from pdr_backend.statutil.dist_plotdata import DistPlotdataFactory -from pdr_backend.statutil.dist_plotter import add_pdf, add_cdf, add_nq +from pdr_backend.binmodel.constants import Dirn, dirn_str, UP, DOWN HEIGHT = 7.5 WIDTH = int(HEIGHT * 3.2) -# pylint: disable=too-many-instance-attributes +# pylint: disable=too-many-instance-attributes,attribute-defined-outside-init class SimPlotter: @enforce_types def __init__( @@ -39,6 +32,7 @@ def __init__( self.multi_id = None @staticmethod + @enforce_types def get_latest_run_id(): if not os.path.exists("sim_state"): raise Exception( @@ -48,10 +42,12 @@ def get_latest_run_id(): return str(path).replace("sim_state/", "") @staticmethod + @enforce_types def get_all_run_names(): path = Path("sim_state").iterdir() return [str(p).replace("sim_state/", "") for p in path] + @enforce_types def load_state(self, multi_id): root_path = f"sim_state/{multi_id}" @@ -94,6 +90,7 @@ def load_state(self, multi_id): return self.st, "final" + @enforce_types def init_state(self, multi_id): files = glob.glob("sim_state/{multi_id}/*") @@ -104,9 +101,8 @@ def init_state(self, multi_id): os.makedirs(f"sim_state/{multi_id}") - def save_state( - self, sim_state, aimodel_plotdata: AimodelPlotdata, is_final: bool = False - ): + @enforce_types + def save_state(self, sim_state, aimodel_plotdata, is_final: bool = False): root_path = f"sim_state/{self.multi_id}" ts = ( datetime.now().strftime("%Y%m%d_%H%M%S.%f")[:-3] @@ -142,87 +138,178 @@ def save_state( @enforce_types def plot_pdr_profit_vs_time(self): - y = list(np.cumsum(self.st.pdr_profits_OCEAN)) + profits = self.st.hist_profits.pdr_profits_OCEAN + cum_profits = list(np.cumsum(profits)) ylabel = "predictoor profit (OCEAN)" - title = f"Predictoor profit vs time. Current: {y[-1]:.2f} OCEAN" + title = f"Pdr profit vs time. Current: {cum_profits[-1]:.2f} OCEAN" + title += f". Avg profit per iter: {np.average(profits):.4f} OCEAN" fig = make_subplots(rows=1, cols=1, subplot_titles=(title,)) - self._add_subplot_y_vs_time(fig, y, ylabel, "lines", row=1, col=1) + self._add_subplot_y_vs_time(fig, cum_profits, ylabel, "lines", row=1, col=1) return fig @enforce_types def plot_trader_profit_vs_time(self): - y = list(np.cumsum(self.st.trader_profits_USD)) + profits = self.st.hist_profits.trader_profits_USD + cum_profits = list(np.cumsum(profits)) ylabel = "trader profit (USD)" - title = f"Trader profit vs time. Current: ${y[-1]:.2f}" + title = f"Trader profit vs time. Current: ${cum_profits[-1]:.2f}" + title += f". Avg profit per iter: ${np.average(profits):.4f}" fig = make_subplots(rows=1, cols=1, subplot_titles=(title,)) - self._add_subplot_y_vs_time(fig, y, ylabel, "lines", row=1, col=1) + self._add_subplot_y_vs_time(fig, cum_profits, ylabel, "lines", row=1, col=1) return fig @enforce_types - def plot_pdr_profit_vs_ptrue(self): - x = self.st.probs_up - y = self.st.pdr_profits_OCEAN - fig = go.Figure( - go.Scatter( - x=x, - y=y, - mode="markers", - marker={"color": "#636EFA", "size": 2}, - ) + def _add_subplot_y_vs_time(self, fig, y, ylabel, mode, row, col): + assert mode in ["markers", "lines"], mode + line, marker = None, None + if mode == "markers": + marker = {"color": "black", "size": 2} + elif mode == "lines": + line = {"color": "#636EFA"} + + x = list(range(len(y))) + + fig.add_traces( + [ + # points: y vs time + go.Scatter( + x=x, + y=y, + mode=mode, + marker=marker, + line=line, + showlegend=False, + ), + # line: horizontal error = 0 + go.Scatter( + x=[min(x), max(x)], + y=[0.0, 0.0], + mode="lines", + line={"color": "grey", "dash": "dot"}, + showlegend=False, + ), + ], + rows=[row] * 2, + cols=[col] * 2, ) - fig.add_hline(y=0, line_dash="dot", line_color="grey") - title = f"Predictoor profit dist. avg={np.average(y):.2f} OCEAN" - fig.update_layout(title=title) - fig.update_xaxes(title="prob(up)") - fig.update_yaxes(title="pdr profit (OCEAN)") + fig.update_xaxes(title="time", row=row, col=col) + fig.update_yaxes(title=ylabel, row=row, col=col) return fig + @enforce_types + def plot_pdr_profit_vs_ptrue(self): + return self._plot_profit_vs_ptrue(is_pdr=True) + @enforce_types def plot_trader_profit_vs_ptrue(self): - x = self.st.probs_up - y = self.st.trader_profits_USD - fig = go.Figure( - go.Scatter( - x=x, - y=y, - mode="markers", - marker={"color": "#636EFA", "size": 2}, - ) - ) - fig.add_hline(y=0, line_dash="dot", line_color="grey") - title = f"trader profit dist. avg={np.average(y):.2f} USD" - fig.update_layout(title=title) - fig.update_xaxes(title="prob(up)") - fig.update_yaxes(title="trader profit (USD)") + return self._plot_profit_vs_ptrue(is_pdr=False) + + @enforce_types + def _plot_profit_vs_ptrue(self, is_pdr: bool): + titles = [self._profit_dist_title(is_pdr, dirn) for dirn in [UP, DOWN]] + + # make subplots + fig = make_subplots(rows=1, cols=2, subplot_titles=titles) + + # fill in subplots + self._add_subplot_profit_dist(fig, is_pdr, UP, row=1, col=1) + self._add_subplot_profit_dist(fig, is_pdr, DOWN, row=1, col=2) + + # global: set ticks + minor = {"ticks": "inside", "showgrid": True} + rng = [0.5, 1.0] + for col in [1, 2]: + fig.update_xaxes(minor=minor, range=rng, dtick=0.1, row=1, col=col) + fig.update_yaxes(minor=minor, row=1, col=col) + + # global: don't show legend + fig.update_layout(showlegend=False) return fig @enforce_types - def plot_model_performance_vs_time(self): - # set titles - aim = self.st.aim - s1 = f"accuracy = {aim.acc_ests[-1]*100:.2f}% " - s1 += f"[{aim.acc_ls[-1]*100:.2f}%, {aim.acc_us[-1]*100:.2f}%]" + def _profit_dist_title(self, is_pdr: bool, dirn: Dirn) -> str: + if is_pdr: + return f"Pdr profit dist'n vs prob({dirn_str(dirn)})" + + return f"Trader profit dist'n vs prob({dirn_str(dirn)})" - s2 = f"f1={aim.f1s[-1]:.4f}" - s2 += f" [recall={aim.recalls[-1]:.4f}" - s2 += f", precision={aim.precisions[-1]:.4f}]" + @enforce_types + def _add_subplot_profit_dist( + self, + fig, + is_pdr: bool, + dirn: Dirn, + row: int, + col: int, + ): + dirn_s = dirn_str(dirn) + x = np.array(self.st.true_vs_pred[dirn].predprobs) + if is_pdr: + y = np.array(self.st.hist_profits.pdr_profits_OCEAN) + else: + y = np.array(self.st.hist_profits.trader_profits_USD) + I = (x >= 0.5).nonzero()[0] + if len(I) > 0: + x, y = x[I], y[I] + fig.add_traces( + [ + # line: profit vs ptrue scatterplot + go.Scatter( + x=x, + y=y, + mode="markers", + marker={"color": "#636EFA", "size": 2}, + ), + # line: 0.0 horizontal + go.Scatter( + x=[min(x), max(x)], + y=[0.0, 0.0], + mode="lines", + name="", + line_dash="dot", + ), + ], + rows=[row] * 2, + cols=[col] * 2, + ) - s3 = f"log loss = {aim.losses[-1]:.4f}" + fig.update_xaxes(title=f"prob({dirn_s})", row=row, col=col) + + if is_pdr: + ytitle = "pdr profit (OCEAN)" + else: + ytitle = "trader profit (USD)" + fig.update_yaxes(title=ytitle, row=row, col=col) + + @enforce_types + def plot_model_performance_vs_time(self): + # set titles + titles = [ + self._acc_title(UP), + self._acc_title(DOWN), + self._f1_title(UP), + self._f1_title(DOWN), + self._loss_title(UP), + self._loss_title(DOWN), + ] # make subplots fig = make_subplots( rows=3, - cols=1, - subplot_titles=(s1, s2, s3), + cols=2, + subplot_titles=titles, vertical_spacing=0.08, ) # fill in subplots - self._add_subplot_accuracy_vs_time(fig, row=1) - self._add_subplot_f1_precision_recall_vs_time(fig, row=2) - self._add_subplot_log_loss_vs_time(fig, row=3) + self._add_subplot_accuracy_vs_time(fig, UP, row=1, col=1) + self._add_subplot_accuracy_vs_time(fig, DOWN, row=1, col=2) + self._add_subplot_f1_precision_recall_vs_time(fig, UP, row=2, col=1) + self._add_subplot_f1_precision_recall_vs_time(fig, DOWN, row=2, col=2) + self._add_subplot_log_loss_vs_time(fig, UP, row=3, col=1) + self._add_subplot_log_loss_vs_time(fig, DOWN, row=3, col=2) # global: set minor ticks minor = {"ticks": "inside", "showgrid": True} @@ -234,11 +321,15 @@ def plot_model_performance_vs_time(self): fig.update_layout( { "xaxis": {"matches": "x", "showticklabels": True}, - "xaxis2": {"matches": "x", "showticklabels": True}, "xaxis3": {"matches": "x", "showticklabels": True}, + "xaxis5": {"matches": "x", "showticklabels": True}, + "xaxis2": {"matches": "x2", "showticklabels": True}, + "xaxis4": {"matches": "x2", "showticklabels": True}, + "xaxis6": {"matches": "x2", "showticklabels": True}, } ) fig.update_xaxes(title="time", row=3, col=1) + fig.update_xaxes(title="time", row=3, col=2) # global: don't show legend fig.update_layout(showlegend=False) @@ -246,13 +337,34 @@ def plot_model_performance_vs_time(self): return fig @enforce_types - def _add_subplot_accuracy_vs_time(self, fig, row): - aim = self.st.aim - acc_ests = [100 * a for a in aim.acc_ests] + def _acc_title(self, dirn: Dirn) -> str: + hist_perfs, dirn_s = self.st.hist_perfs[dirn], dirn_str(dirn) + s = f"{dirn_s} accuracy={hist_perfs.acc_ests[-1]*100:.1f}% " + s += f"[{hist_perfs.acc_ls[-1]*100:.1f}%, {hist_perfs.acc_us[-1]*100:.1f}%]" + return s + + @enforce_types + def _f1_title(self, dirn: Dirn) -> str: + hist_perfs, dirn_s = self.st.hist_perfs[dirn], dirn_str(dirn) + s = f"{dirn_s} f1={hist_perfs.f1s[-1]:.2f}" + s += f" [recall={hist_perfs.recalls[-1]:.2f}" + s += f", prec'n={hist_perfs.precisions[-1]:.2f}]" + return s + + @enforce_types + def _loss_title(self, dirn: Dirn) -> str: + hist_perfs, dirn_s = self.st.hist_perfs[dirn], dirn_str(dirn) + s = f"{dirn_s} log loss = {hist_perfs.losses[-1]:.2f}" + return s + + @enforce_types + def _add_subplot_accuracy_vs_time(self, fig, dirn: Dirn, row: int, col: int): + hist_perfs = self.st.hist_perfs[dirn] + acc_ests = [100 * a for a in hist_perfs.acc_ests] df = pd.DataFrame(acc_ests, columns=["accuracy"]) - df["acc_ls"] = [100 * a for a in aim.acc_ls] - df["acc_us"] = [100 * a for a in aim.acc_us] - df["time"] = range(len(aim.acc_ests)) + df["acc_ls"] = [100 * a for a in hist_perfs.acc_ls] + df["acc_us"] = [100 * a for a in hist_perfs.acc_us] + df["time"] = range(len(hist_perfs.acc_ests)) fig.add_traces( [ @@ -291,17 +403,17 @@ def _add_subplot_accuracy_vs_time(self, fig, row): ), ], rows=[row] * 4, - cols=[1] * 4, + cols=[col] * 4, ) fig.update_yaxes(title_text="accuracy (%)", row=1, col=1) @enforce_types - def _add_subplot_f1_precision_recall_vs_time(self, fig, row): - aim = self.st.aim - df = pd.DataFrame(aim.f1s, columns=["f1"]) - df["precisions"] = aim.precisions - df["recalls"] = aim.recalls - df["time"] = range(len(aim.f1s)) + def _add_subplot_f1_precision_recall_vs_time(self, fig, dirn, row, col): + hist_perfs = self.st.hist_perfs[dirn] + df = pd.DataFrame(hist_perfs.f1s, columns=["f1"]) + df["precisions"] = hist_perfs.precisions + df["recalls"] = hist_perfs.recalls + df["time"] = range(len(hist_perfs.f1s)) fig.add_traces( [ @@ -339,130 +451,29 @@ def _add_subplot_f1_precision_recall_vs_time(self, fig, row): ), ], rows=[row] * 4, - cols=[1] * 4, + cols=[col] * 4, ) fig.update_yaxes(title_text="f1, etc", row=2, col=1) @enforce_types - def _add_subplot_log_loss_vs_time(self, fig, row): - aim = self.st.aim - df = pd.DataFrame(aim.losses, columns=["log loss"]) - df["time"] = range(len(aim.losses)) + def _add_subplot_log_loss_vs_time(self, fig, dirn: Dirn, row: int, col: int): + hist_perfs = self.st.hist_perfs[dirn] + df = pd.DataFrame(hist_perfs.losses, columns=["log loss"]) + df["time"] = range(len(hist_perfs.losses)) fig.add_trace( - go.Scatter(x=df["time"], y=df["log loss"], mode="lines", name="log loss"), + go.Scatter( + x=df["time"], + y=df["log loss"], + mode="lines", + name="", + marker_color="#636EFA", + ), row=row, - col=1, + col=col, ) fig.update_yaxes(title_text="log loss", row=3, col=1) - @enforce_types - def plot_prediction_residuals_dist(self): - if _model_is_classif(self.st): - return _empty_fig("(Nothing to show because model is a classifier.)") - - # calc data - d = DistPlotdataFactory.build(self.st.aim.yerrs) - - # initialize subplots - s1, s2, s3 = "Residuals distribution", "", "" - fig = make_subplots( - rows=3, - cols=1, - subplot_titles=(s1, s2, s3), - vertical_spacing=0.02, - shared_xaxes=True, - ) - - # fill in subplots - add_pdf(fig, d, row=1, col=1) - add_cdf(fig, d, row=2, col=1) - add_nq(fig, d, row=3, col=1) - - # global: set minor ticks - minor = {"ticks": "inside", "showgrid": True} - fig.update_yaxes(minor=minor, row=1, col=1) - for row in [2, 3, 4, 5]: - fig.update_yaxes(minor=minor, row=row, col=1) - fig.update_xaxes(minor=minor, row=row, col=1) - - return fig - - @enforce_types - def plot_prediction_residuals_other(self): - if _model_is_classif(self.st): - return _empty_fig() - - # calc data - nlags = 10 # magic number alert # FIX ME: have spinner, like ARIMA feeds - d = AutocorrelationPlotdataFactory.build(self.st.aim.yerrs, nlags=nlags) - - # initialize subplots - s1 = "Residuals vs time" - s2 = "Residuals correlogram" - fig = make_subplots( - rows=2, - cols=1, - subplot_titles=(s1, s2), - vertical_spacing=0.12, - ) - - # fill in subplots - self._add_subplot_residual_vs_time(fig, row=1, col=1) - add_corr_traces( - fig, - d.acf_results, - row=2, - col=1, - ylabel="autocorrelation (ACF)", - ) - - return fig - - @enforce_types - def _add_subplot_residual_vs_time(self, fig, row, col): - y = self.st.aim.yerrs - self._add_subplot_y_vs_time(fig, y, "residual", "markers", row, col) - - @enforce_types - def _add_subplot_y_vs_time(self, fig, y, ylabel, mode, row, col): - assert mode in ["markers", "lines"], mode - line, marker = None, None - if mode == "markers": - marker = {"color": "black", "size": 2} - elif mode == "lines": - line = {"color": "#636EFA"} - - x = list(range(len(y))) - - fig.add_traces( - [ - # points: y vs time - go.Scatter( - x=x, - y=y, - mode=mode, - marker=marker, - line=line, - showlegend=False, - ), - # line: horizontal error = 0 - go.Scatter( - x=[min(x), max(x)], - y=[0.0, 0.0], - mode="lines", - line={"color": "grey", "dash": "dot"}, - showlegend=False, - ), - ], - rows=[row] * 2, - cols=[col] * 2, - ) - fig.update_xaxes(title="time", row=row, col=col) - fig.update_yaxes(title=ylabel, row=row, col=col) - - return fig - @enforce_types def file_age_in_seconds(pathname): @@ -470,22 +481,6 @@ def file_age_in_seconds(pathname): return time.time() - stat_result.st_mtime -@enforce_types -def _model_is_classif(sim_state) -> bool: - yerrs = sim_state.aim.yerrs - return min(yerrs) == max(yerrs) == 0.0 - - -@enforce_types -def _empty_fig(title=""): - fig = go.Figure() - w = "white" - fig.update_layout(title=title, paper_bgcolor=w, plot_bgcolor=w) - fig.update_xaxes(visible=False, showgrid=False, gridcolor=w, zerolinecolor=w) - fig.update_yaxes(visible=False, showgrid=False, gridcolor=w, zerolinecolor=w) - return fig - - @enforce_types def _get_latest_usable_state(root_path: str): all_state_files = glob.glob(f"{root_path}/st_*.pkl") diff --git a/pdr_backend/sim/sim_predictoor.py b/pdr_backend/sim/sim_predictoor.py new file mode 100644 index 000000000..d1a6bdda5 --- /dev/null +++ b/pdr_backend/sim/sim_predictoor.py @@ -0,0 +1,33 @@ +from typing import Tuple + +from enforce_typing import enforce_types + +from pdr_backend.binmodel.binmodel_prediction import BinmodelPrediction +from pdr_backend.ppss.predictoor_ss import PredictoorSS + + +class SimPredictoor: + @enforce_types + def __init__(self, pdr_ss: PredictoorSS): + self.pdr_ss = pdr_ss + + @property + def max_stake_amt(self) -> float: + return self.pdr_ss.stake_amount.amt_eth + + @enforce_types + def predict_iter(self, p: BinmodelPrediction) -> Tuple[float, float]: + """@return (stake_up, stake_down)""" + if not p.do_trust_models(): + stake_up = 0.0 + stake_down = 0.0 + elif p.prob_UP >= p.prob_DOWN: + stake_amt = self.max_stake_amt * p.conf_up + stake_up = stake_amt * p.prob_up_MERGED + stake_down = stake_amt * (1.0 - p.prob_up_MERGED) + else: # p.prob_DOWN > p.prob_UP + stake_amt = self.max_stake_amt * p.conf_down + stake_up = stake_amt * p.prob_up_MERGED + stake_down = stake_amt * (1.0 - p.prob_up_MERGED) + + return (stake_up, stake_down) diff --git a/pdr_backend/sim/sim_state.py b/pdr_backend/sim/sim_state.py index 371d6eea6..325202bf9 100644 --- a/pdr_backend/sim/sim_state.py +++ b/pdr_backend/sim/sim_state.py @@ -5,13 +5,26 @@ from typing import Dict, List, Optional, Union from enforce_typing import enforce_types +import numpy as np +from pdr_backend.aimodel.true_vs_pred import PERF_NAMES, TrueVsPred +from pdr_backend.binmodel.binmodel import Binmodel +from pdr_backend.binmodel.binmodel_data import BinmodelData +from pdr_backend.binmodel.constants import Dirn, dirn_str, UP, DOWN -@enforce_types -class AimodelMetrics: - # pylint: disable=too-many-instance-attributes - def __init__(self): +# ============================================================================= +# HistPerfs + + +# pylint: disable=too-many-instance-attributes +class HistPerfs: + """Historical performances, for 1 model dir'n (eg UP)""" + + @enforce_types + def __init__(self, dirn: Dirn): + self.dirn = dirn + # 'i' is iteration number i self.acc_ests: List[float] = [] # [i] : %-correct self.acc_ls: List[float] = [] # [i] : %-correct-lower @@ -23,9 +36,11 @@ def __init__(self): self.losses: List[float] = [] # [i] : log-loss - self.yerrs: List[float] = [] # [i] : regressor pred'n errs, w/ sign + @enforce_types + def update(self, perfs_list: list): + """perfs_list typically comes from TrueVsPred.perf_values()""" + acc_est, acc_l, acc_u, f1, precision, recall, loss = perfs_list - def update(self, acc_est, acc_l, acc_u, f1, precision, recall, loss, yerr): self.acc_ests.append(acc_est) self.acc_ls.append(acc_l) self.acc_us.append(acc_u) @@ -36,87 +51,169 @@ def update(self, acc_est, acc_l, acc_u, f1, precision, recall, loss, yerr): self.losses.append(loss) - self.yerrs.append(yerr) + @enforce_types + def metrics_names_instance(self) -> List[str]: + """@return e.g. ['acc_est_UP', 'acc_l_UP', ..., 'loss_UP]""" + return HistPerfs.metrics_names_static(self.dirn) @staticmethod - def recent_metrics_names() -> List[str]: - return [ - "acc_est", - "acc_l", - "acc_u", - "f1", - "precision", - "recall", - "loss", - "yerr", - ] - - def recent_metrics(self) -> Dict[str, Union[int, float, None]]: - """Return most recent aimodel metrics""" - if not self.acc_ests: - return {key: None for key in AimodelMetrics.recent_metrics_names()} + def metrics_names_static(dirn) -> List[str]: + """@return e.g. ['acc_est_UP', 'acc_l_UP', ..., 'loss_UP]""" + return [f"{name}_{dirn_str(dirn)}" for name in PERF_NAMES] + @enforce_types + def recent_metrics_values(self) -> Dict[str, float]: + """Return most recent metrics""" + assert self.have_data(), "only works for >0 entries" + + s = dirn_str(self.dirn) return { - "acc_est": self.acc_ests[-1], - "acc_l": self.acc_ls[-1], - "acc_u": self.acc_us[-1], - "f1": self.f1s[-1], - "precision": self.precisions[-1], - "recall": self.recalls[-1], - "loss": self.losses[-1], - "yerr": self.yerrs[-1], + f"acc_est_{s}": self.acc_ests[-1], + f"acc_l_{s}": self.acc_ls[-1], + f"acc_u_{s}": self.acc_us[-1], + f"f1_{s}": self.f1s[-1], + f"precision_{s}": self.precisions[-1], + f"recall_{s}": self.recalls[-1], + f"loss_{s}": self.losses[-1], } + @enforce_types + def final_metrics_values(self) -> Dict[str, float]: + """Return *final* metrics, rather than most recent.""" + assert self.have_data(), "only works for >0 entries" -# pylint: disable=too-many-instance-attributes -@enforce_types -class SimState: - def __init__(self): - self.init_loop_attributes() - self.iter_number = 0 + s = dirn_str(self.dirn) + return { + f"acc_est_{s}": self.acc_ests[-1], + f"acc_l_{s}": self.acc_ls[-1], + f"acc_u_{s}": self.acc_us[-1], + f"f1_{s}": float(np.mean(self.f1s)), + f"precision_{s}": float(np.mean(self.precisions)), + f"recall_{s}": float(np.mean(self.recalls)), + f"loss_{s}": float(np.mean(self.losses)), + } + + @enforce_types + def have_data(self) -> bool: + return bool(self.acc_ests) - def init_loop_attributes(self): - # 'i' is iteration number i - # base data - self.ytrues: List[bool] = [] # [i] : was-truly-up - self.probs_up: List[float] = [] # [i] : predicted-prob-up +# ============================================================================= +# HistProfits - # aimodel metrics - self.aim = AimodelMetrics() +PROFIT_NAMES = ["pdr_profit_OCEAN", "trader_profit_USD"] - # profits + +class HistProfits: + def __init__(self): self.pdr_profits_OCEAN: List[float] = [] # [i] : predictoor-profit self.trader_profits_USD: List[float] = [] # [i] : trader-profit + @enforce_types + def update(self, pdr_profit_OCEAN: float, trader_profit_USD: float): + self.pdr_profits_OCEAN.append(pdr_profit_OCEAN) + self.trader_profits_USD.append(trader_profit_USD) + @staticmethod - def recent_metrics_names() -> List[str]: - return AimodelMetrics.recent_metrics_names() + [ - "pdr_profit_OCEAN", - "trader_profit_USD", - ] - - def recent_metrics( - self, extras: Optional[List[str]] = None - ) -> List[Union[int, float]]: - """Return most recent aimodel metrics + profit metrics""" - rm = self.aim.recent_metrics().copy() - rm.update( - { - "pdr_profit_OCEAN": self.pdr_profits_OCEAN[-1], - "trader_profit_USD": self.trader_profits_USD[-1], - } - ) + def metrics_names() -> List[str]: + return PROFIT_NAMES - if extras and "prob_up" in extras: - rm["prob_up"] = self.probs_up[-1] + @enforce_types + def recent_metrics_values(self) -> Dict[str, float]: + """Return most recent metrics""" + assert self.have_data(), "only works for >0 entries" - return rm + return { + "pdr_profit_OCEAN": self.pdr_profits_OCEAN[-1], + "trader_profit_USD": self.trader_profits_USD[-1], + } + + @enforce_types + def final_metrics_values(self) -> Dict[str, float]: + """Return *final* metrics, rather than most recent.""" + assert self.have_data(), "only works for >0 entries" + + return { + "pdr_profit_OCEAN": np.sum(self.pdr_profits_OCEAN), + "trader_profit_USD": np.sum(self.trader_profits_USD), + } - @property - def ytrues_hat(self) -> List[bool]: - return [p > 0.5 for p in self.probs_up] + @enforce_types + def have_data(self) -> bool: + return bool(self.pdr_profits_OCEAN) - @property - def n_correct(self) -> int: - return sum((p > 0.5) == t for p, t in zip(self.probs_up, self.ytrues)) + +# ============================================================================= +# SimState + + +class SimState: + @enforce_types + def __init__(self): + self.init_loop_attributes() + + @enforce_types + def init_loop_attributes(self): + self.iter_number = 0 + self.binmodel_data: Optional[BinmodelData] = None + self.binmodel: Optional[Binmodel] = None + self.true_vs_pred = {UP: TrueVsPred(), DOWN: TrueVsPred()} + self.hist_perfs = {UP: HistPerfs(UP), DOWN: HistPerfs(DOWN)} + self.hist_profits = HistProfits() + + @enforce_types + def update( + self, + trueval: dict, + predprob: dict, + pdr_profit_OCEAN: float, + trader_profit_USD: float, + ): + """ + @arguments + trueval -- dict of {UP: trueval_UP, DOWN: trueval_DOWN} + predprob -- dict of {UP: predprob_UP, DOWN: predprob_DOWN} + pdr_profit_OCEAN -- + trader_profit_USD -- + """ + self.true_vs_pred[UP].update(trueval[UP], predprob[UP]) + self.true_vs_pred[DOWN].update(trueval[DOWN], predprob[DOWN]) + + self.hist_perfs[UP].update(self.true_vs_pred[UP].perf_values()) + self.hist_perfs[DOWN].update(self.true_vs_pred[DOWN].perf_values()) + + self.hist_profits.update(pdr_profit_OCEAN, trader_profit_USD) + + @staticmethod + def metrics_names() -> List[str]: + return ( + HistPerfs.metrics_names_static(UP) + + HistPerfs.metrics_names_static(DOWN) + + HistProfits.metrics_names() + ) + + @enforce_types + def recent_metrics_values(self) -> Dict[str, Union[int, float, None]]: + """Return most recent aimodel metrics + profit metrics""" + metrics = {} + metrics.update(self.hist_perfs[UP].recent_metrics_values()) + metrics.update(self.hist_perfs[DOWN].recent_metrics_values()) + metrics.update(self.hist_profits.recent_metrics_values()) + return metrics + + @enforce_types + def final_metrics_values(self) -> Dict[str, Union[int, float, None]]: + """Return *final* metrics, rather than most recent.""" + metrics = {} + metrics.update(self.hist_perfs[UP].final_metrics_values()) + metrics.update(self.hist_perfs[DOWN].final_metrics_values()) + metrics.update(self.hist_profits.final_metrics_values()) + return metrics + + @enforce_types + def have_data(self) -> bool: + return ( + self.hist_perfs[UP].have_data() + and self.hist_perfs[DOWN].have_data() + and self.hist_profits.have_data() + ) diff --git a/pdr_backend/sim/sim_trader.py b/pdr_backend/sim/sim_trader.py index b5ec94792..2a80338f3 100644 --- a/pdr_backend/sim/sim_trader.py +++ b/pdr_backend/sim/sim_trader.py @@ -3,25 +3,28 @@ # SPDX-License-Identifier: Apache-2.0 # import logging + from enforce_typing import enforce_types -from pdr_backend.exchange.exchange_mgr import ExchangeMgr +from pdr_backend.binmodel.binmodel_prediction import BinmodelPrediction +from pdr_backend.exchange.exchange_mgr import ExchangeMgr logger = logging.getLogger("sim_trader") # pylint: disable=too-many-instance-attributes class SimTrader: - def __init__(self, ppss, predict_feed): + @enforce_types + def __init__(self, ppss): self.ppss = ppss - self.position_open = "" # long, short, "" - self.position_size = 0 # amount of tokens in position - self.position_worth = 0 # amount of USD in position - self.tp = 0.0 # take profit - self.sl = 0.0 # stop loss - self.tp_percent = self.ppss.trader_ss.take_profit_percent - self.sl_percent = self.ppss.trader_ss.stop_loss_percent + self.position_open: str = "" # long, short, "" + self.position_size: float = 0.0 # amount of tokens in position + self.position_worth: float = 0.0 # amount of USD in position + self.tp: float = 0.0 # take profit + self.sl: float = 0.0 # stop loss + self.tp_percent: float = self.ppss.trader_ss.take_profit_percent + self.sl_percent: float = self.ppss.trader_ss.stop_loss_percent mock = self.ppss.sim_ss.tradetype in ["histmock"] exchange_mgr = ExchangeMgr(self.ppss.exchange_mgr_ss) @@ -29,7 +32,7 @@ def __init__(self, ppss, predict_feed): "mock" if mock else ppss.predictoor_ss.exchange_str, ) - self.predict_feed = predict_feed + self.predict_feed = ppss.predictoor_ss.predict_train_feedsets[0].predict assert isinstance(self.tokcoin, str) assert isinstance(self.usdcoin, str) @@ -43,26 +46,48 @@ def usdcoin(self) -> str: """Return e.g. 'USDT'""" return self.predict_feed.pair.quote_str + @enforce_types def close_long_position(self, sell_price: float) -> float: tokcoin_amt_send = self.position_size usd_received = self._sell(sell_price, tokcoin_amt_send) self.position_open = "" profit = usd_received - self.position_worth - return profit + return float(profit) + @enforce_types def close_short_position(self, buy_price: float) -> float: usdcoin_amt_send = self.position_size * buy_price self._buy(buy_price, usdcoin_amt_send) self.position_open = "" profit = self.position_worth - usdcoin_amt_send - return profit + return float(profit) - # pylint: disable = too-many-return-statements + @enforce_types def trade_iter( self, cur_close: float, - pred_up, - pred_down, + high: float, + low: float, + p: BinmodelPrediction, + ) -> float: + profit_USD = self._trade_iter( + cur_close, + p.pred_up, + p.pred_down, + p.conf_up, + p.conf_down, + high, + low, + ) + return float(profit_USD) + + # pylint: disable=too-many-return-statements + @enforce_types + def _trade_iter( + self, + cur_close: float, + pred_up: bool, + pred_down: bool, conf_up: float, conf_down: float, high: float, @@ -78,13 +103,12 @@ def trade_iter( @arguments cur_close -- current price of the token + high -- highest price reached during the previous period + low -- lowest price reached during the previous period pred_up -- prediction that the price will go up pred_down -- prediction that the price will go down conf_up -- confidence in the prediction that the price will go up conf_down -- confidence in the prediction that the price will go down - high -- highest price reached during the period - low -- lowest price reached during the period - @return profit -- profit made by the trader in this iteration @@ -110,7 +134,7 @@ def trade_iter( self.position_size = tokcoin_amt_send self.tp = cur_close - (cur_close * self.tp_percent) self.sl = cur_close + (cur_close * self.sl_percent) - return 0 + return 0.0 # Check for take profit or stop loss if self.position_open == "long": @@ -133,7 +157,7 @@ def trade_iter( if not pred_down: return self.close_short_position(cur_close) - return 0 + return 0.0 @enforce_types def _buy(self, price: float, usdcoin_amt_send: float) -> float: @@ -165,7 +189,7 @@ def _buy(self, price: float, usdcoin_amt_send: float) -> float: self.usdcoin, ) - return tokcoin_amt_recd + return float(tokcoin_amt_recd) @enforce_types def _sell(self, price: float, tokcoin_amt_send: float) -> float: @@ -199,4 +223,4 @@ def _sell(self, price: float, tokcoin_amt_send: float) -> float: self.usdcoin, ) - return usdcoin_amt_recd + return float(usdcoin_amt_recd) diff --git a/pdr_backend/sim/test/test_calc_pdr_profit.py b/pdr_backend/sim/test/test_calc_pdr_profit.py new file mode 100644 index 000000000..404ded611 --- /dev/null +++ b/pdr_backend/sim/test/test_calc_pdr_profit.py @@ -0,0 +1,168 @@ +from enforce_typing import enforce_types +import pytest +from pytest import approx + +from pdr_backend.sim.calc_pdr_profit import calc_pdr_profit + + +@enforce_types +def test_calc_pdr_profit__happy_path(): + # true = up, guess = up (correct guess), others fully wrong + profit: float = calc_pdr_profit( + others_stake=2000.0, + others_accuracy=0.0, + stake_up=1000.0, + stake_down=0.0, + revenue=2.0, + true_up_close=True, + ) + assert profit == 2002.0 + + # true = down, guess = down (correct guess), others fully wrong + profit: float = calc_pdr_profit( + others_stake=2000.0, + others_accuracy=0.0, + stake_up=0.0, + stake_down=1000.0, + revenue=2.0, + true_up_close=False, + ) + assert profit == 2002.0 + + # true = up, guess = down (incorrect guess), others fully right + profit: float = calc_pdr_profit( + others_stake=2000.0, + others_accuracy=1.0, + stake_up=0.0, + stake_down=1000.0, + revenue=2.0, + true_up_close=True, + ) + assert profit == -1000.0 + + # true = down, guess = up (incorrect guess), others fully right + profit: float = calc_pdr_profit( + others_stake=2000.0, + others_accuracy=1.0, + stake_up=1000.0, + stake_down=0.0, + revenue=2.0, + true_up_close=False, + ) + assert profit == -1000.0 + + # true = up, guess = up AND down (half-correct), others fully wrong + # summary: I should get back all my stake $, plus stake $ of others + # calculations: + # - sent (by me) = stake_up + stake_down = 1000 + 100 = 1100 + # - tot_stake (by all) = others_stake + stake_up + stake_down + # = 1000 + 1000 + 100 = 2100 + # - tot_stake_correct (by all) = others_stake_correct + stake_up + # = 1000*0.0 + 1000 = 1000 + # - percent_to_me = stake_up / tot_stake_correct = 1000/1000 = 1.0 + # - rec'd (to me) = (revenue + tot_stake) * percent_to_me + # = (2 + 3100) * 1.0 = 3102 + # - profit = received - sent = 2102 - 1100 = 1002 + profit: float = calc_pdr_profit( + others_stake=1000.0, + others_accuracy=0.00, + stake_up=1000.0, + stake_down=100.0, + revenue=2.0, + true_up_close=True, + ) + assert profit == 1002.0 + + # true = up, guess = lots up & some down, others 30% accurate + # summary: + # calculations: + # - amt_sent = stake_up + stake_down = 1000 + 100 = 1100 + # - others_stake_correct = 1000 * 0.3 = 300 + # - tot_stake = others_stake + stake_up + stake_down + # = 1000 + 1000 + 100 = 2100 + # - tot_stake_correct = others_stake_correct + stake_up + # = 1000*0.30 + 1000 = 300 + 1000 = 1300 + # - percent_to_me = stake_up / tot_stake_correct + # = 1000/1300 = 0.7692307692307693 + # - amt_received = (revenue + tot_stake) * percent_to_me + # = (2 + 2100) * 0.769230 = 1616.9230769 + # - profit = received - sent = 1616.9230769 - 1100 = 516.923 + profit: float = calc_pdr_profit( + others_stake=1000.0, + others_accuracy=0.30, + stake_up=1000.0, + stake_down=100.0, + revenue=2.0, + true_up_close=True, + ) + assert profit == approx(516.923) + + +@enforce_types +def test_calc_pdr_profit__unhappy_path(): + o_stake = 2000.0 + o_accuracy = 0.51 + stake_up = 1000.0 + stake_down = 100.0 + revenue = 15.0 + true_up_close = True + + with pytest.raises(AssertionError): + calc_pdr_profit( + -0.1, + o_accuracy, + stake_up, + stake_down, + revenue, + true_up_close, + ) + + with pytest.raises(AssertionError): + calc_pdr_profit( + o_stake, + -0.1, + stake_up, + stake_down, + revenue, + true_up_close, + ) + + with pytest.raises(AssertionError): + calc_pdr_profit( + o_stake, + +1.1, + stake_up, + stake_down, + revenue, + true_up_close, + ) + + with pytest.raises(AssertionError): + calc_pdr_profit( + o_stake, + o_accuracy, + -0.1, + stake_down, + revenue, + true_up_close, + ) + + with pytest.raises(AssertionError): + calc_pdr_profit( + o_stake, + o_accuracy, + stake_up, + -0.1, + revenue, + true_up_close, + ) + + with pytest.raises(AssertionError): + calc_pdr_profit( + o_stake, + o_accuracy, + stake_up, + stake_down, + -0.1, + true_up_close, + ) diff --git a/pdr_backend/sim/test/test_dash_plots.py b/pdr_backend/sim/test/test_dash_plots.py index 935415bb4..e2662e3a9 100644 --- a/pdr_backend/sim/test/test_dash_plots.py +++ b/pdr_backend/sim/test/test_dash_plots.py @@ -4,24 +4,29 @@ # from unittest.mock import Mock, patch +from enforce_typing import enforce_types from plotly.graph_objs import Figure +from pdr_backend.binmodel.constants import UP, DOWN from pdr_backend.sim.dash_plots.util import get_figures_by_state from pdr_backend.sim.dash_plots.view_elements import ( - get_tabs, - figure_names, + FIGURE_NAMES, get_header_elements, + get_tabs, get_waiting_template, - selected_var_checklist, + selected_var_UP_checklist, + selected_var_DOWN_checklist, ) from pdr_backend.sim.sim_plotter import SimPlotter +@enforce_types def test_get_waiting_template(): result = get_waiting_template("custom message") assert "custom message" in result.children[0].children +@enforce_types def test_get_header_elements(): st = Mock() st.iter_number = 5 @@ -37,21 +42,32 @@ def test_get_header_elements(): assert result[1].className == "finalState" +@enforce_types def test_get_tabs(): - figures = {key: Figure() for key in figure_names} + figures = {name: Figure() for name in FIGURE_NAMES} result = get_tabs(figures) for tab in result: assert "name" in tab assert "components" in tab -def test_selected_var_checklist(): - result = selected_var_checklist(["var1", "var2"], ["var1"]) - assert result.value == ["var1"] - assert result.options[0]["label"] == "var1" - assert result.options[1]["label"] == "var2" +@enforce_types +def test_selected_var_UP_checklist(): + result = selected_var_UP_checklist(["var_up1", "var_up2"], ["var_up1"]) + assert result.value == ["var_up1"] + assert result.options[0]["label"] == "var_up1" + assert result.options[1]["label"] == "var_up2" + + +@enforce_types +def test_selected_var_DOWN_checklist(): + result = selected_var_DOWN_checklist(["var_down1", "var_down2"], ["var_down1"]) + assert result.value == ["var_down1"] + assert result.options[0]["label"] == "var_down1" + assert result.options[1]["label"] == "var_down2" +@enforce_types def test_get_figures_by_state(): mock_sim_plotter = Mock(spec=SimPlotter) mock_sim_plotter.plot_pdr_profit_vs_time.return_value = Figure() @@ -59,22 +75,27 @@ def test_get_figures_by_state(): mock_sim_plotter.plot_pdr_profit_vs_ptrue.return_value = Figure() mock_sim_plotter.plot_trader_profit_vs_ptrue.return_value = Figure() mock_sim_plotter.plot_model_performance_vs_time.return_value = Figure() - mock_sim_plotter.plot_prediction_residuals_dist.return_value = Figure() - mock_sim_plotter.plot_prediction_residuals_other.return_value = Figure() - plotdata = Mock() - plotdata.colnames = ["var1", "var2"] + plotdata = {UP: Mock(), DOWN: Mock()} + plotdata[UP].colnames = ["var_up1", "var_up2"] + plotdata[DOWN].colnames = ["var_down1", "var_down2"] mock_sim_plotter.aimodel_plotdata = plotdata with patch( "pdr_backend.sim.dash_plots.util.aimodel_plotter" ) as mock_aimodel_plotter: + # *not* with UP or DOWN here, because the plot_*_() calls input Dirn mock_aimodel_plotter.plot_aimodel_response.return_value = Figure() mock_aimodel_plotter.plot_aimodel_varimps.return_value = Figure() - result = get_figures_by_state(mock_sim_plotter, ["var1", "var2"]) + figs = get_figures_by_state( + mock_sim_plotter, + ["var_up1", "var_up2"], + ["var_down1", "var_down2"], + ) - for key in figure_names: - assert key in result - assert isinstance(result[key], Figure) + assert sorted(figs.keys()) == sorted(FIGURE_NAMES) + for fig_name in FIGURE_NAMES: + assert fig_name in figs + assert isinstance(figs[fig_name], Figure) diff --git a/pdr_backend/sim/test/test_multisim_engine.py b/pdr_backend/sim/test/test_multisim_engine.py index 8fce0cad9..0f54110fa 100644 --- a/pdr_backend/sim/test/test_multisim_engine.py +++ b/pdr_backend/sim/test/test_multisim_engine.py @@ -28,7 +28,7 @@ def test_multisim1(tmpdir): multisim_engine.run() # csv ok? - target_columns = ["run_number"] + SimState.recent_metrics_names() + [param] + target_columns = ["run_number"] + SimState.metrics_names() + [param] assert multisim_engine.csv_header() == target_columns assert os.path.exists(multisim_engine.csv_file) df = multisim_engine.load_csv() diff --git a/pdr_backend/sim/test/test_sim_engine.py b/pdr_backend/sim/test/test_sim_engine_main.py similarity index 68% rename from pdr_backend/sim/test/test_sim_engine.py rename to pdr_backend/sim/test/test_sim_engine_main.py index 4da9f288d..56e82a392 100644 --- a/pdr_backend/sim/test/test_sim_engine.py +++ b/pdr_backend/sim/test/test_sim_engine_main.py @@ -1,7 +1,3 @@ -# -# Copyright 2024 Ocean Protocol Foundation -# SPDX-License-Identifier: Apache-2.0 -# import os import pytest @@ -9,9 +5,11 @@ from enforce_typing import enforce_types from selenium.common.exceptions import NoSuchElementException # type: ignore[import-untyped] -from pdr_backend.aimodel.aimodel import Aimodel +from pdr_backend.binmodel.binmodel import Binmodel from pdr_backend.cli.predict_train_feedsets import PredictTrainFeedsets from pdr_backend.ppss.lake_ss import LakeSS, lake_ss_test_dict +from pdr_backend.ppss.aimodel_ss import aimodel_ss_test_dict +from pdr_backend.ppss.aimodel_data_ss import aimodel_data_ss_test_dict from pdr_backend.ppss.ppss import PPSS, fast_test_yaml_str from pdr_backend.ppss.predictoor_ss import PredictoorSS, predictoor_ss_test_dict from pdr_backend.ppss.sim_ss import SimSS, sim_ss_test_dict @@ -22,7 +20,7 @@ @enforce_types # pylint: disable=unused-argument -def test_sim_engine(tmpdir, check_chromedriver, dash_duo): +def test_sim_engine_main(tmpdir, check_chromedriver, dash_duo): s = fast_test_yaml_str(tmpdir) ppss = PPSS(yaml_str=s, network="development") @@ -37,37 +35,45 @@ def test_sim_engine(tmpdir, check_chromedriver, dash_duo): # lake ss lake_dir = os.path.join(tmpdir, "parquet_data") - d = lake_ss_test_dict(lake_dir, feeds=feedsets.feed_strs) - assert "st_timestr" in d - d["st_timestr"] = "2023-06-18" - d["fin_timestr"] = "2023-06-19" - ppss.lake_ss = LakeSS(d) + lake_d = lake_ss_test_dict( + lake_dir, + feeds=feedsets.feed_strs, + st_timestr="2023-06-18", + fin_timestr="2023-06-19", + ) + ppss.lake_ss = LakeSS(lake_d) # predictoor ss - d = predictoor_ss_test_dict(feedset_list) - assert "max_n_train" in d["aimodel_data_ss"] - assert "autoregressive_n" in d["aimodel_data_ss"] - assert "approach" in d["aimodel_ss"] - assert "train_every_n_epochs" in d["aimodel_ss"] - d["aimodel_data_ss"]["max_n_train"] = 20 - d["aimodel_data_ss"]["autoregressive_n"] = 1 - d["aimodel_ss"]["approach"] = "ClassifLinearRidge" - d["aimodel_ss"]["train_every_n_epochs"] = 2 - ppss.predictoor_ss = PredictoorSS(d) + pdr_d = predictoor_ss_test_dict( + feedset_list, + aimodel_data_ss_dict=aimodel_data_ss_test_dict( + max_n_train=20, + autoregressive_n=1, + ), + aimodel_ss_dict=aimodel_ss_test_dict( + approach="ClassifLinearRidge", + train_every_n_epochs=2, + ), + ) + ppss.predictoor_ss = PredictoorSS(pdr_d) # sim ss log_dir = os.path.join(tmpdir, "logs") - d = sim_ss_test_dict(log_dir, test_n=5) - ppss.sim_ss = SimSS(d) + sim_d = sim_ss_test_dict(log_dir, test_n=5) + ppss.sim_ss = SimSS(sim_d) # go - feedsets = ppss.predictoor_ss.predict_train_feedsets - sim_engine = SimEngine(ppss, feedsets[0]) + sim_engine = SimEngine(ppss) - assert sim_engine.model is None + assert sim_engine.st.binmodel is None sim_engine.run() - assert isinstance(sim_engine.model, Aimodel) + # basic test that engine ran + assert isinstance(sim_engine.st.binmodel, Binmodel) + + # basic tests for plots + if check_chromedriver is None: + return app = Dash("pdr_backend.sim.sim_dash") app.config["suppress_callback_exceptions"] = True app.run_id = sim_engine.multi_id @@ -94,7 +100,6 @@ def test_sim_engine(tmpdir, check_chromedriver, dash_duo): "trader_profit_tab": ["trader_profit_vs_time", "trader_profit_vs_ptrue"], "model_performance_tab": ["model_performance_vs_time"], "model_response_tab": ["aimodel_response", "aimodel_varimps"], - "model_residuals_tab": ["prediction_residuals_other"], } for tab_name, figures in tabs.items(): diff --git a/pdr_backend/sim/test/test_sim_logger.py b/pdr_backend/sim/test/test_sim_logger.py index 309a3ca0a..5a788ff6f 100644 --- a/pdr_backend/sim/test/test_sim_logger.py +++ b/pdr_backend/sim/test/test_sim_logger.py @@ -5,6 +5,8 @@ import os from unittest.mock import Mock +from enforce_typing import enforce_types + from pdr_backend.ppss.ppss import PPSS, fast_test_yaml_str from pdr_backend.ppss.sim_ss import SimSS, sim_ss_test_dict from pdr_backend.sim.sim_logger import SimLogLine @@ -12,7 +14,8 @@ from pdr_backend.util.time_types import UnixTimeMs -def test_compact_num(tmpdir, caplog): +@enforce_types +def test_sim_logger(tmpdir, caplog): s = fast_test_yaml_str(tmpdir) ppss = PPSS(yaml_str=s, network="development") @@ -22,6 +25,7 @@ def test_compact_num(tmpdir, caplog): st = Mock(spec=SimState) st.ytrues = [True, False, True, False, True] + st.recent_metrics = Mock() st.recent_metrics.return_value = { "pdr_profit_OCEAN": 1.0, "trader_profit_USD": 2.0, @@ -34,20 +38,17 @@ def test_compact_num(tmpdir, caplog): "recall": 0.2, "loss": 0.3, } - st.pdr_profits_OCEAN = [1.0, 2.0, 3.0, 4.0, 5.0] - st.trader_profits_USD = [2.0, 3.0, 4.0, 5.0, 6.0] + st.hist_profits = Mock() + st.hist_profits.pdr_profits_OCEAN = [1.0, 2.0, 3.0, 4.0, 5.0] + st.hist_profits.trader_profits_USD = [2.0, 3.0, 4.0, 5.0, 6.0] ut = UnixTimeMs(1701634400000) - log_line = SimLogLine(ppss, st, 1, ut, 0.5, 0.6) - log_line.log_line() - - assert "pdr_profit=0.50 up" in caplog.text - assert "prcsn=0.100" in caplog.text + log_line = SimLogLine(ppss, st, 1, ut) + log_line.log() + assert "pdr_profit=" in caplog.text + assert "tdr_profit=" in caplog.text assert f"Iter #2/{ppss.sim_ss.test_n}" in caplog.text - log_line = SimLogLine(ppss, st, 1, ut, 0.003, 0.4) - log_line.log_line() - - assert "pdr_profit=3.00e-3 up" in caplog.text - assert "prcsn=0.100" in caplog.text + log_line = SimLogLine(ppss, st, 1, ut) + log_line.log() assert f"Iter #2/{ppss.sim_ss.test_n}" in caplog.text diff --git a/pdr_backend/sim/test/test_sim_predictoor.py b/pdr_backend/sim/test/test_sim_predictoor.py new file mode 100644 index 000000000..9721c4a47 --- /dev/null +++ b/pdr_backend/sim/test/test_sim_predictoor.py @@ -0,0 +1,53 @@ +from enforce_typing import enforce_types + +from pdr_backend.binmodel.binmodel_prediction import BinmodelPrediction +from pdr_backend.ppss.predictoor_ss import PredictoorSS, predictoor_ss_test_dict +from pdr_backend.sim.sim_predictoor import SimPredictoor + + +@enforce_types +def test_sim_predictoor__attributes(): + sim_pdr = _sim_pdr() + assert isinstance(sim_pdr.pdr_ss, PredictoorSS) + + +@enforce_types +def test_sim_predictoor__properties(): + sim_pdr = _sim_pdr() + max_stake_amt = sim_pdr.pdr_ss.stake_amount.amt_eth + assert isinstance(max_stake_amt, float | int) + assert max_stake_amt > 0.0 + + +@enforce_types +def test_sim_predictoor__predict_iter(): + # base data + sim_pdr = _sim_pdr() + + # case 1: don't trust models + p = BinmodelPrediction(conf_thr=0.9, prob_UP=0.4, prob_DOWN=0.4) + assert not p.do_trust_models() + stake_up, stake_down = sim_pdr.predict_iter(p) + assert stake_up == stake_down == 0.0 + + # case 2: UP dominates + p = BinmodelPrediction(conf_thr=0.1, prob_UP=0.6, prob_DOWN=0.4) + assert p.do_trust_models() + stake_up, stake_down = sim_pdr.predict_iter(p) + assert 0.0 < stake_down < stake_up < 1.0 + assert (stake_up + stake_down) <= sim_pdr.max_stake_amt + + # case 3: DOWN dominates + p = BinmodelPrediction(conf_thr=0.1, prob_UP=0.4, prob_DOWN=0.6) + assert p.do_trust_models() + stake_up, stake_down = sim_pdr.predict_iter(p) + assert 0.0 < stake_up < stake_down < 1.0 + assert (stake_up + stake_down) <= sim_pdr.max_stake_amt + + +@enforce_types +def _sim_pdr() -> SimPredictoor: + pdr_d = predictoor_ss_test_dict() + pdr_ss = PredictoorSS(pdr_d) + sim_pdr = SimPredictoor(pdr_ss) + return sim_pdr diff --git a/pdr_backend/sim/test/test_sim_state.py b/pdr_backend/sim/test/test_sim_state.py index 0ac01ef08..d59919e8c 100644 --- a/pdr_backend/sim/test/test_sim_state.py +++ b/pdr_backend/sim/test/test_sim_state.py @@ -1,6 +1,262 @@ -# -# Copyright 2024 Ocean Protocol Foundation -# SPDX-License-Identifier: Apache-2.0 -# +from enforce_typing import enforce_types +import numpy as np +import pytest -# do nothing here, it's all tested in test_sim_engine.py +from pdr_backend.aimodel.true_vs_pred import PERF_NAMES, TrueVsPred +from pdr_backend.binmodel.constants import dirn_str, UP, DOWN +from pdr_backend.sim.sim_state import ( + HistPerfs, + HistProfits, + PROFIT_NAMES, + SimState, +) + +# ============================================================================= +# test HistPerfs + + +@enforce_types +@pytest.mark.parametrize("dirn", [UP, DOWN]) +def test_hist_perfs__basic_init(dirn): + # set data + dirn_s = dirn_str(dirn) + hist_perfs = HistPerfs(dirn) + + # test empty raw state + assert hist_perfs.acc_ests == hist_perfs.acc_ls == hist_perfs.acc_us == [] + assert hist_perfs.f1s == hist_perfs.precisions == hist_perfs.recalls == [] + assert hist_perfs.losses == [] + + # test names + assert len(PERF_NAMES) == 7 + target_names = _target_perfs_names(dirn_s) + assert hist_perfs.metrics_names_instance() == target_names + assert hist_perfs.metrics_names_instance()[0] == f"acc_est_{dirn_s}" + assert hist_perfs.metrics_names_instance()[-1] == f"loss_{dirn_s}" + assert HistPerfs.metrics_names_static(dirn) == target_names + + # test can't call for metrics + assert not hist_perfs.have_data() + with pytest.raises(AssertionError): + _ = hist_perfs.recent_metrics_values() + with pytest.raises(AssertionError): + _ = hist_perfs.final_metrics_values() + + +@enforce_types +@pytest.mark.parametrize("dirn", [UP, DOWN]) +def test_hist_perfs__main(dirn): + # set data + dirn_s = dirn_str(dirn) + target_names = [f"{name}_{dirn_s}" for name in PERF_NAMES] + hist_perfs = HistPerfs(dirn) + + perfs_list1 = list(np.arange(0.1, 7.1, 1.0)) # 0.1, 1.1, ..., 6.1 + perfs_list2 = list(np.arange(0.2, 7.2, 1.0)) # 0.2, 1.2, ..., 6.2 + hist_perfs.update(perfs_list1) + hist_perfs.update(perfs_list2) + + # test raw state + assert hist_perfs.acc_ests == [0.1, 0.2] + assert hist_perfs.acc_ls == [1.1, 1.2] + assert hist_perfs.acc_us == [2.1, 2.2] + assert hist_perfs.f1s == [3.1, 3.2] + assert hist_perfs.precisions == [4.1, 4.2] + assert hist_perfs.recalls == [5.1, 5.2] + assert hist_perfs.losses == [6.1, 6.2] + + # test can call for metrics + assert hist_perfs.have_data() + + # test *recent* metrics + values = hist_perfs.recent_metrics_values() + assert len(values) == 7 + assert sorted(values.keys()) == sorted(target_names) + assert f"acc_est_{dirn_s}" in values + assert values == { + f"acc_est_{dirn_s}": 0.2, + f"acc_l_{dirn_s}": 1.2, + f"acc_u_{dirn_s}": 2.2, + f"f1_{dirn_s}": 3.2, + f"precision_{dirn_s}": 4.2, + f"recall_{dirn_s}": 5.2, + f"loss_{dirn_s}": 6.2, + } + + # test *final* metrics + values = hist_perfs.final_metrics_values() + assert len(values) == 7 + assert sorted(values.keys()) == sorted(target_names) + assert f"acc_est_{dirn_s}" in values + assert values == { + f"acc_est_{dirn_s}": 0.2, + f"acc_l_{dirn_s}": 1.2, + f"acc_u_{dirn_s}": 2.2, + f"f1_{dirn_s}": np.mean([3.1, 3.2]), + f"precision_{dirn_s}": np.mean([4.1, 4.2]), + f"recall_{dirn_s}": np.mean([5.1, 5.2]), + f"loss_{dirn_s}": np.mean([6.1, 6.2]), + } + + +@enforce_types +def _target_perfs_names(dirn_s: str): + return [f"{name}_{dirn_s}" for name in PERF_NAMES] + + +# ============================================================================= +# test HistProfits + + +@enforce_types +def test_hist_profits__basic_init(): + # set data + hist_profits = HistProfits() + + # test empty raw data + assert hist_profits.pdr_profits_OCEAN == [] + assert hist_profits.trader_profits_USD == [] + + # test names + target_names = PROFIT_NAMES + names = HistProfits.metrics_names() + assert names == target_names + + # test can't call for metrics + assert not hist_profits.have_data() + with pytest.raises(AssertionError): + _ = hist_profits.recent_metrics_values() + with pytest.raises(AssertionError): + _ = hist_profits.final_metrics_values() + + +@enforce_types +def test_hist_profits__update(): + # set data + hist_profits = HistProfits() + hist_profits.update(2.1, 3.1) + hist_profits.update(2.2, 3.2) + + # test raw values + assert hist_profits.pdr_profits_OCEAN == [2.1, 2.2] + assert hist_profits.trader_profits_USD == [3.1, 3.2] + + # test can call for metrics + assert hist_profits.have_data() + + # test *recent* metrics + target_names = PROFIT_NAMES + values = hist_profits.recent_metrics_values() + assert sorted(values.keys()) == sorted(target_names) + assert values == {"pdr_profit_OCEAN": 2.2, "trader_profit_USD": 3.2} + + # test *final* metrics + values = hist_profits.final_metrics_values() + assert sorted(values.keys()) == sorted(target_names) + assert values == { + "pdr_profit_OCEAN": np.sum([2.1, 2.2]), + "trader_profit_USD": np.sum([3.1, 3.2]), + } + + +# ============================================================================= +# test SimState + + +@enforce_types +def test_sim_state__basic_init(): + # set data + st = SimState() + + # test empty raw state + assert st.iter_number == 0 + assert st.binmodel_data is None + assert st.binmodel is None + assert isinstance(st.true_vs_pred[UP], TrueVsPred) + assert isinstance(st.true_vs_pred[DOWN], TrueVsPred) + assert isinstance(st.hist_perfs[UP], HistPerfs) + assert isinstance(st.hist_perfs[DOWN], HistPerfs) + assert isinstance(st.hist_profits, HistProfits) + + # test names + target_names = _target_state_names() + assert len(target_names) == 7 * 2 + 2 + names = SimState.metrics_names() + assert names == target_names + + # test can't call for metrics + assert not st.have_data() + with pytest.raises(AssertionError): + _ = st.recent_metrics_values() + with pytest.raises(AssertionError): + _ = st.final_metrics_values() + + +@enforce_types +def test_sim_state__init_loop_attributes(): + # init + st = SimState() + + # change after init + st.iter_number = 1 + st.binmodel = "foo" + + # should go back to init state + st.init_loop_attributes() + + # check + assert st.iter_number == 0 + assert st.binmodel is None + + +@enforce_types +def test_sim_state__main(): + st = SimState() + target_names = _target_state_names() + + # update + trueval = {UP: True, DOWN: False} + predprob = {UP: 0.6, DOWN: 0.3} + + st.update(trueval, predprob, pdr_profit_OCEAN=1.4, trader_profit_USD=1.5) + st.update(trueval, predprob, pdr_profit_OCEAN=2.4, trader_profit_USD=2.5) + + # test raw state -- true_vs_pred + assert st.true_vs_pred[UP].truevals == [True, True] + assert st.true_vs_pred[UP].predprobs == [0.6, 0.6] + assert st.true_vs_pred[DOWN].truevals == [False, False] + assert st.true_vs_pred[DOWN].predprobs == [0.3, 0.3] + + # test *recent* metrics + values = st.recent_metrics_values() + assert len(values) == 7 * 2 + 2 + assert sorted(values.keys()) == sorted(target_names) + for name, val in values.items(): + if name == "pdr_profit_OCEAN": + assert val == 2.4 + elif name == "trader_profit_USD": + assert val == 2.5 + elif "loss" in name: + assert 0.0 <= val <= 3.0 + else: # hist_perfs value + assert 0.0 <= val <= 1.0, (name, val) + + # test *final* metrics + values = st.final_metrics_values() + assert sorted(values.keys()) == sorted(target_names) + for name, val in values.items(): + if name == "pdr_profit_OCEAN": + assert val == np.sum([1.4, 2.4]) + elif name == "trader_profit_USD": + assert val == np.sum([1.5, 2.5]) + elif "loss" in name: + assert 0.0 <= val <= 3.0 + else: # hist_perfs value + assert 0.0 <= val <= 1.0, (name, val) + + +@enforce_types +def _target_state_names(): + return [ + f"{name}_{dirn_str(dirn)}" for dirn in [UP, DOWN] for name in PERF_NAMES + ] + PROFIT_NAMES diff --git a/pdr_backend/sim/test/test_sim_trader.py b/pdr_backend/sim/test/test_sim_trader.py index 0a4b68f9b..ea3be1cda 100644 --- a/pdr_backend/sim/test/test_sim_trader.py +++ b/pdr_backend/sim/test/test_sim_trader.py @@ -6,16 +6,16 @@ from unittest.mock import Mock - +from enforce_typing import enforce_types import pytest - from pdr_backend.ppss.exchange_mgr_ss import ExchangeMgrSS from pdr_backend.sim.sim_trader import SimTrader FEE_PERCENT = 0.01 +@enforce_types @pytest.fixture def mock_ppss(): ppss = Mock() @@ -26,97 +26,106 @@ def mock_ppss(): ppss.sim_ss.tradetype = "histmock" ppss.exchange_mgr_ss = Mock(spec=ExchangeMgrSS) ppss.predictoor_ss.exchange_str = "mock" - return ppss - + ppss.predictoor_ss.predict_train_feedsets = [Mock()] -@pytest.fixture -def mock_predict_feed(): predict_feed = Mock() predict_feed.pair.base_str = "ETH" predict_feed.pair.quote_str = "USDT" - return predict_feed + ppss.predictoor_ss.predict_train_feedsets[0].predict = predict_feed + + return ppss +@enforce_types @pytest.fixture -def sim_trader(mock_ppss, mock_predict_feed): - return SimTrader(mock_ppss, mock_predict_feed) +def sim_trader(mock_ppss): + return SimTrader(mock_ppss) +@enforce_types def test_initial_state(sim_trader): assert sim_trader.position_open == "" - assert sim_trader.position_size == 0 - assert sim_trader.position_worth == 0 + assert sim_trader.position_size == 0.0 + assert sim_trader.position_worth == 0.0 assert sim_trader.tp == 0.0 assert sim_trader.sl == 0.0 +@enforce_types def test_close_long_position(sim_trader): sim_trader.position_open = "long" - sim_trader.position_size = 10 - sim_trader.position_worth = 1000 - sim_trader._sell = Mock(return_value=1100) - profit = sim_trader.close_long_position(110) - assert profit == 100 + sim_trader.position_size = 10.0 + sim_trader.position_worth = 1000.0 + sim_trader._sell = Mock(return_value=1100.0) + profit = sim_trader.close_long_position(110.0) + assert profit == 100.0 assert sim_trader.position_open == "" +@enforce_types def test_close_short_position(sim_trader): sim_trader.position_open = "short" - sim_trader.position_size = 10 - sim_trader.position_worth = 1000 + sim_trader.position_size = 10.0 + sim_trader.position_worth = 1000.0 sim_trader._buy = Mock() - profit = sim_trader.close_short_position(90) - assert profit == 100 + profit = sim_trader.close_short_position(90.0) + assert profit == 100.0 assert sim_trader.position_open == "" +@enforce_types def test_trade_iter_open_long(sim_trader): - sim_trader._buy = Mock(return_value=10) - sim_trader.trade_iter(100, True, False, 0.5, 0, 110, 90) + sim_trader._buy = Mock(return_value=10.0) + sim_trader._trade_iter(100.0, True, False, 0.5, 0.0, 110.0, 90.0) assert sim_trader.position_open == "long" - assert sim_trader.position_worth == 1500 - assert sim_trader.position_size == 10 + assert sim_trader.position_worth == 1500.0 + assert sim_trader.position_size == 10.0 +@enforce_types def test_trade_iter_open_short(sim_trader): - sim_trader._sell = Mock(return_value=1500) - sim_trader.trade_iter(100, False, True, 0, 0.5, 110, 90) + sim_trader._sell = Mock(return_value=1500.0) + sim_trader._trade_iter(100.0, False, True, 0.0, 0.5, 110.0, 90.0) assert sim_trader.position_open == "short" - assert sim_trader.position_worth == 1500 - assert sim_trader.position_size == 15 + assert sim_trader.position_worth == 1500.0 + assert sim_trader.position_size == 15.0 +@enforce_types def test_trade_iter_close_long_take_profit_percent(sim_trader): sim_trader.position_open = "long" - sim_trader.position_size = 10 - sim_trader.position_worth = 1000 - sim_trader.tp = 110 - sim_trader._sell = Mock(return_value=1100) - profit = sim_trader.trade_iter(100, False, False, 0, 0, 110, 90) - assert profit == 100 # 1100 - 1000 + sim_trader.position_size = 10.0 + sim_trader.position_worth = 1000.0 + sim_trader.tp = 110.0 + sim_trader._sell = Mock(return_value=1100.0) + profit = sim_trader._trade_iter(100.0, False, False, 0.0, 0.0, 110.0, 90.0) + assert profit == 100.0 # 1100 - 1000 assert sim_trader.position_open == "" +@enforce_types def test_trade_iter_close_short_stop_loss_percent(sim_trader): sim_trader.position_open = "short" - sim_trader.position_size = 10 - sim_trader.position_worth = 1000 - sim_trader.sl = 110 + sim_trader.position_size = 10.0 + sim_trader.position_worth = 1000.0 + sim_trader.sl = 110.0 sim_trader._buy = Mock() - profit = sim_trader.trade_iter(100, False, False, 0, 0, 110, 90) - assert profit == -100 # 1100 - 1000 + profit = sim_trader._trade_iter(100.0, False, False, 0.0, 0.0, 110.0, 90.0) + assert profit == -100.0 # 1100 - 1000 assert sim_trader.position_open == "" +@enforce_types def test_buy(sim_trader): sim_trader.exchange.create_market_buy_order = Mock() tokcoin_amt_recd = sim_trader._buy(100.0, 1000.0) - assert tokcoin_amt_recd == (1000 / 100) * (1 - FEE_PERCENT) + assert tokcoin_amt_recd == (1000.0 / 100.0) * (1.0 - FEE_PERCENT) sim_trader.exchange.create_market_buy_order.assert_called_once() +@enforce_types def test_sell(sim_trader): sim_trader.exchange.create_market_sell_order = Mock() usdcoin_amt_recd = sim_trader._sell(100.0, 10.0) - assert usdcoin_amt_recd == (100 * 10) * (1 - FEE_PERCENT) + assert usdcoin_amt_recd == (100.0 * 10.0) * (1.0 - FEE_PERCENT) sim_trader.exchange.create_market_sell_order.assert_called_once() diff --git a/ppss.yaml b/ppss.yaml index e5bc60143..9a18898c4 100644 --- a/ppss.yaml +++ b/ppss.yaml @@ -2,12 +2,11 @@ ## Copyright 2024 Ocean Protocol Foundation ## SPDX-License-Identifier: Apache-2.0 ## - # (web3_pp / network settings is at bottom, because it's long) lake_ss: lake_dir: lake_data feeds: - - binance BTC/USDT ETH/USDT 5m + - binance BTC/USDT 5m # - binance BTC/USDT ETH/USDT BNB/USDT XRP/USDT ADA/USDT DOGE/USDT SOL/USDT LTC/USDT TRX/USDT DOT/USDT 5m # - kraken BTC/USDT 5m st_timestr: 30 days ago # starting date for data @@ -34,14 +33,15 @@ predictoor_ss: aimodel_data_ss: # used by AimodelDataFactory max_n_train: 1000 # no. epochs to train model on - autoregressive_n: 2 # no. epochs that model looks back, to predict next + autoregressive_n: 1 # no. epochs that model looks back, to predict next + class_thr: 0.002 # 0.05 = 5%. 0.0001=0.01% UP class needs >this, DOWN raw value of signal; RelDiff -> rel % diff aimodel_ss: # used by AimodelFactory approach: ClassifLinearRidge # ClassifLinearLasso | ClassifLinearLasso_Balanced | ClassifLinearRidge | ClassifLinearRidge_Balanced | ClassifLinearElasticNet | ClassifLinearElasticNet_Balanced | ClassifLinearSVC | ClassifGaussianProcess | ClassifXgboost | ClassifConstant | RegrLinearLS | RegrLinearLasso | RegrLinearRidge | RegrLinearElasticNet | RegrGaussianProcess | RegrXgboost | RegrConstant - weight_recent: 10x_5x # 10x_5x | 10000x | None + weight_recent: None # 10x_5x | 10000x | None balance_classes: None # SMOTE | RandomOverSampler | None - calibrate_probs: CalibratedClassifierCV_Sigmoid # CalibratedClassifierCV_Sigmoid | CalibratedClassifierCV_Isotonic | None + calibrate_probs: None # CalibratedClassifierCV_Sigmoid | CalibratedClassifierCV_Isotonic | None calibrate_regr: CurrentYval # CurrentYval | None train_every_n_epochs: 1 calc_imps: True