Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #1278, round 2: [Sim] Reframe to classify 'will price go up *anytime* in next 5 min?' #1281

Open
wants to merge 58 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
d0fe232
Fix #1278:[Sim] Experiment: Reframe to classify 'will price go up *an…
trentmc Jun 22, 2024
0eb7bc9
tweaks
trentmc Jun 22, 2024
f4b1775
wip
trentmc Jun 22, 2024
24a739e
make linters happy
trentmc Jun 22, 2024
b288253
undo
trentmc Jun 22, 2024
e475ee2
Make UP and DOWN classifier models (and related data) much more expli…
trentmc Jun 23, 2024
e2f9ae2
Improve predictoor logic: only stake when models are confident enough
trentmc Jun 23, 2024
fbaa8c3
bug fix
trentmc Jun 23, 2024
a03244e
bug fix 2
trentmc Jun 23, 2024
dbe344c
use high/low values for X
trizin Jun 24, 2024
d3ea155
Big refactor: mostly done first cut of writing (incl tests), but have…
trentmc Jun 26, 2024
0cfa982
wip getting tests to work, and adding tests
trentmc Jul 1, 2024
928d740
wip
trentmc Jul 2, 2024
c9ad2fb
many more tests passing, including all of test_sim_state.py
trentmc Jul 3, 2024
c73deb1
fixing many bugs
trentmc Jul 3, 2024
96a16d0
many bug fixes
trentmc Jul 3, 2024
81078f6
test_sim_engine.py working
trentmc Jul 3, 2024
0f7e1a9
more bug fixees
trentmc Jul 3, 2024
caf5cbf
deprecate residuals plots
trentmc Jul 3, 2024
c442a59
black
trentmc Jul 3, 2024
4e167b3
black & pylint happy
trentmc Jul 3, 2024
b8fed96
all linters happy
trentmc Jul 3, 2024
58bdebb
tweak
trentmc Jul 3, 2024
220e06e
plots are running end-to-end
trentmc Jul 3, 2024
253fa8b
show both UP and DOWN model performances
trentmc Jul 3, 2024
91b4f51
add UP and down for pdr profit dist'n vs prob. Other plot tweaks
trentmc Jul 4, 2024
b762641
better format plots
trentmc Jul 4, 2024
0c1a800
add UP and DOWN for trader profit dist'n
trentmc Jul 4, 2024
bad97b3
Simpler code for pdr & trader profit dist'n
trentmc Jul 4, 2024
d65ec0e
add UP and DOWN plots for model responses
trentmc Jul 4, 2024
23eef88
tweak
trentmc Jul 4, 2024
9675765
selecting vars works for UP and DOWN model response plots
trentmc Jul 4, 2024
9447627
black
trentmc Jul 4, 2024
42fbe76
sim unit tests passing
trentmc Jul 4, 2024
3547ef7
all linters pass
trentmc Jul 4, 2024
5e382d8
bug fix
trentmc Jul 4, 2024
862c366
move calc_pdr_profit()
trentmc Jul 4, 2024
1651e94
bug fixes, and more enforcement
trentmc Jul 4, 2024
2960286
lower log loss if constant
trentmc Jul 4, 2024
88f950e
git merge main, minus most of https://github.com/oceanprotocol/pdr-ba…
trentmc Jul 4, 2024
6ceb3f0
git merge main
trentmc Jul 5, 2024
2018cc9
Merge branch 'main' into issue1278-reframe2
trentmc Jul 5, 2024
1109246
git merge main
trentmc Jul 5, 2024
c297942
Merge branch 'main' into issue1278-reframe2
trentmc Jul 5, 2024
b028062
sync trader.md
trentmc Jul 5, 2024
88389df
trader 2
trentmc Jul 5, 2024
67f6156
aimodel data factory
trentmc Jul 5, 2024
6bef2a0
remove variant_close() etc
trentmc Jul 5, 2024
a4ff665
Merge branch 'main' into issue1278-reframe2
trentmc Jul 5, 2024
6d73ff5
Merge branch 'issue1278-reframe2' of https://github.com/oceanprotocol…
trentmc Jul 5, 2024
bd84539
big rename: sim_model -> grpmodel
trentmc Jul 6, 2024
11c3e67
rename grp -> bin
trentmc Jul 6, 2024
b816e18
re-order imports alphabetically
trentmc Jul 6, 2024
7f8ef72
bug fixes
trentmc Jul 6, 2024
992a588
git merge main
trentmc Jul 6, 2024
4a8b291
simplify, and avoid TypeError / DeprecationWarning
trentmc Jul 6, 2024
39c1e30
BIG bug fix. How: more thorough test_binmodel_data_factory.py -> led …
trentmc Jul 6, 2024
2f662df
update ppss.yaml
trentmc Jul 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions pdr_backend/aimodel/aimodel_data_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ class AimodelDataFactory:
def __init__(self, ss: PredictoorSS):
self.ss = ss

@staticmethod
def testshift(test_n: int, test_i: int) -> int:
return test_n - test_i - 1

def create_xy(
self,
mergedohlcv_df: pl.DataFrame,
Expand Down Expand Up @@ -156,10 +160,12 @@ def create_xy(
assert len(x_list) == len(xrecent_list) == len(xcol_list)
x_df = pd.concat(x_list, keys=xcol_list, axis=1)
xrecent_df = pd.concat(xrecent_list, keys=xcol_list, axis=1)
assert x_df.shape[0] == N_train + 1 # the +1 is for test

# convert x dfs to numpy arrays
X = x_df.to_numpy()
xrecent = xrecent_df.to_numpy()[0, :]
assert X.shape[0] == N_train + 1 # the +1 is for test

# y is set from yval_{exch_str, signal_str, pair_str}
hist_col = hist_col_name(predict_feed)
Expand Down Expand Up @@ -215,6 +221,12 @@ def _slice(x: list, st: int, fin: int) -> list:
assert st < 0
assert fin <= 0
assert st < fin
assert abs(st) <= len(x), f"st is out of bounds. st={st}, len(x)={len(x)}"

if fin == 0:
return x[st:]
return x[st:fin]
slicex = x[st:]
else:
slicex = x[st:fin]

assert len(slicex) == fin - st, (len(slicex), fin - st, st, fin)
return slicex
5 changes: 5 additions & 0 deletions pdr_backend/aimodel/test/test_aimodel_data_factory_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,17 @@ def test_create_xy_notransform__1exchange_1coin_1signal():
]
d = predictoor_ss_test_dict(feedset_list=feedset_list)
predictoor_ss = PredictoorSS(d)
predictoor_ss.aimodel_data_ss.set_max_n_train(7)
predictoor_ss.aimodel_data_ss.set_autoregressive_n(3)

predict_feed = predictoor_ss.predict_train_feedsets[0].predict
train_feeds = predictoor_ss.predict_train_feedsets[0].train_on
aimodel_data_factory = AimodelDataFactory(predictoor_ss)
mergedohlcv_df = merge_rawohlcv_dfs(ETHUSDT_RAWOHLCV_DFS)

# =========== have testshift = 0
# no. rows of X = len(y) = max_n_train + max_n_test(=1) = 7 + 1 = 8
# no. cols of X = autoregressive_n * num_signals = 3 * 1 = 3
target_X = np.array(
[
[11.0, 10.0, 9.0], # oldest
Expand Down
40 changes: 40 additions & 0 deletions pdr_backend/aimodel/test/test_slice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from enforce_typing import enforce_types
import pytest

from pdr_backend.aimodel.aimodel_data_factory import _slice


@enforce_types
def test_slice__happy_path():
x = [1, 2, 3, 4, 5, 6, 7]
assert _slice(x=x, st=-2, fin=0) == [6, 7]
assert _slice(x=x, st=-3, fin=-1) == [5, 6]
assert _slice(x=x, st=-7, fin=-5) == [1, 2]
assert _slice(x=x, st=-7, fin=-6) == [1]
assert _slice(x=x, st=-7, fin=0) == x
assert _slice(x=[1], st=-1, fin=0) == [1]


@enforce_types
def test_slice__unhappy_path():
# need st < 0
with pytest.raises(AssertionError):
_ = _slice(x=[1, 2, 3], st=0, fin=-2)

# need fin <= 0
with pytest.raises(AssertionError):
_ = _slice(x=[1, 2, 3], st=-2, fin=1)

# need st < fin
with pytest.raises(AssertionError):
_ = _slice(x=[1, 2, 3], st=-4, fin=-4)

with pytest.raises(AssertionError):
_ = _slice(x=[1, 2, 3], st=-4, fin=-5)

# st out of bounds
with pytest.raises(AssertionError):
_slice(x=[1, 2, 3, 4, 5, 6, 7], st=-8, fin=-5)

with pytest.raises(AssertionError):
_slice(x=[], st=-1, fin=0)
74 changes: 74 additions & 0 deletions pdr_backend/aimodel/test/test_true_vs_pred.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from enforce_typing import enforce_types
from pytest import approx

from pdr_backend.aimodel.true_vs_pred import PERF_NAMES, TrueVsPred


# pylint: disable=too-many-statements
@enforce_types
def test_true_vs_pred():
d = TrueVsPred()
assert d.truevals == []
assert d.predprobs == []
assert d.predvals == []
assert d.n_correct == 0
assert d.n_trials == 0

# true = up, guess = up (correct guess)
d.update(trueval=True, predprob=0.6)
assert d.truevals == [True]
assert d.predprobs == [0.6]
assert d.predvals == [True]
assert d.n_correct == 1
assert d.n_trials == 1
assert len(d.accuracy()) == 3
assert d.accuracy()[0] == 1.0 / 1.0

# true = down, guess = down (correct guess)
d.update(trueval=False, predprob=0.3)
assert d.truevals == [True, False]
assert d.predprobs == [0.6, 0.3]
assert d.predvals == [True, False]
assert d.n_correct == 2
assert d.n_trials == 2
assert d.accuracy()[0] == 2.0 / 2.0

# true = up, guess = down (incorrect guess)
d.update(trueval=True, predprob=0.4)
assert d.truevals == [True, False, True]
assert d.predprobs == [0.6, 0.3, 0.4]
assert d.predvals == [True, False, False]
assert d.n_correct == 2
assert d.n_trials == 3
assert d.accuracy()[0] == approx(2.0 / 3.0)

# true = down, guess = up (incorrect guess)
d.update(trueval=False, predprob=0.7)
assert d.truevals == [True, False, True, False]
assert d.predprobs == [0.6, 0.3, 0.4, 0.7]
assert d.predvals == [True, False, False, True]
assert d.n_correct == 2
assert d.n_trials == 4
assert d.accuracy()[0] == approx(2.0 / 4.0)

# test performance values
(acc_est, acc_l, acc_u) = d.accuracy()
assert acc_est == approx(0.5)
assert acc_l == approx(0.010009003864986377)
assert acc_u == approx(0.9899909961350136)

(precision, recall, f1) = d.precision_recall_f1()
assert precision == approx(0.5)
assert recall == approx(0.5)
assert f1 == approx(0.5)

loss = d.log_loss()
assert loss == approx(0.7469410259762035)

assert d.perf_names() == PERF_NAMES
assert len(d.perf_values()) == len(PERF_NAMES)

target_values = [acc_est, acc_l, acc_u, precision, recall, f1, loss]
values = d.perf_values()
for val, target_val in zip(values, target_values):
assert val == approx(target_val)
76 changes: 76 additions & 0 deletions pdr_backend/aimodel/true_vs_pred.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from typing import List, Tuple

from enforce_typing import enforce_types

from sklearn.metrics import log_loss, precision_recall_fscore_support
from statsmodels.stats.proportion import proportion_confint

PERF_NAMES = ["acc_est", "acc_l", "acc_u", "f1", "precision", "recall", "loss"]

LOG_LOSS_ON_CONSTANT = 0.5


class TrueVsPred:
"""
True vs pred vals for a single aimodel, or for a history of models,
+ the performances that derive from true vs pred value info
"""

@enforce_types
def __init__(self):
# 'i' is iteration number i
self.truevals: List[bool] = [] # [i] : true value
self.predprobs: List[float] = [] # [i] : model's pred. prob.

@enforce_types
def update(self, trueval: bool, predprob: float):
self.truevals.append(trueval)
self.predprobs.append(predprob)

@property
def predvals(self) -> List[bool]:
"""@return [i] : model pred. value"""
return [p > 0.5 for p in self.predprobs]

@property
def n_correct(self) -> int:
return sum(tv == pv for tv, pv in zip(self.truevals, self.predvals))

@property
def n_trials(self) -> int:
return len(self.truevals)

@enforce_types
def accuracy(self) -> Tuple[float, float, float]:
n_correct, n_trials = self.n_correct, self.n_trials
acc_est = n_correct / n_trials
acc_l, acc_u = proportion_confint(count=n_correct, nobs=n_trials)
return (acc_est, acc_l, acc_u)

@enforce_types
def precision_recall_f1(self) -> Tuple[float, float, float]:
(precision, recall, f1, _) = precision_recall_fscore_support(
self.truevals,
self.predvals,
average="binary",
zero_division=0.0,
)
return (precision, recall, f1)

@enforce_types
def log_loss(self) -> float:
if min(self.truevals) == max(self.truevals):
return LOG_LOSS_ON_CONSTANT
return log_loss(self.truevals, self.predprobs)

@enforce_types
def perf_names(self) -> List[str]:
return PERF_NAMES

@enforce_types
def perf_values(self) -> List[float]:
perfs_list = (
list(self.accuracy()) + list(self.precision_recall_f1()) + [self.log_loss()]
)
assert len(perfs_list) == len(PERF_NAMES)
return perfs_list
32 changes: 32 additions & 0 deletions pdr_backend/binmodel/binmodel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from enforce_typing import enforce_types

from pdr_backend.aimodel.aimodel import Aimodel
from pdr_backend.binmodel.constants import UP, DOWN


@enforce_types
class Binmodel(dict):

def __init__(self, model_UP: Aimodel, model_DOWN: Aimodel):
self[UP] = model_UP
self[DOWN] = model_DOWN

def predict_next(self, X_test: dict) -> dict:
"""
@arguments
X_test_dict -- dict of {UP: X_test_UP_array, DOWN: X_test_DOWN_arr.}
It expects each array to have exactly 1 row, to predict from

@return
predprob -- dict of {UP: predprob_UP_float, DOWN: predprob_DOWN_float}
"""
assert X_test[UP].shape[0] == 1
assert X_test[DOWN].shape[0] == 1

prob_UP = self[UP].predict_ptrue(X_test[UP])[0]
prob_DOWN = self[DOWN].predict_ptrue(X_test[UP])[0]

# ensure not np.float64. Why: applying ">" gives np.bool --> problems
prob_UP, prob_DOWN = float(prob_UP), float(prob_DOWN)

return {UP: prob_UP, DOWN: prob_DOWN}
59 changes: 59 additions & 0 deletions pdr_backend/binmodel/binmodel_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from typing import Dict, List

from enforce_typing import enforce_types
import numpy as np

from pdr_backend.binmodel.constants import Dirn, UP, DOWN


class BinmodelData1Dir:

@enforce_types
def __init__(
self,
X: np.ndarray,
ytrue: np.ndarray,
colnames: List[str],
):
assert len(X.shape) == 2
assert len(ytrue.shape) == 1
assert X.shape[0] == ytrue.shape[0], (X.shape[0], ytrue.shape[0])

self.X: np.ndarray = X
self.ytrue: np.ndarray = ytrue
self.colnames: List[str] = colnames

@property
def st(self) -> int:
return 0

@property
def fin(self) -> int:
return self.X.shape[0] - 1

@property
def X_train(self) -> np.ndarray:
return self.X[self.st : self.fin, :]

@property
def ytrue_train(self) -> np.ndarray:
return self.ytrue[self.st : self.fin]

@property
def X_test(self) -> np.ndarray:
return self.X[self.fin : self.fin + 1, :]


class BinmodelData(dict):
@enforce_types
def __init__(
self,
data_UP: BinmodelData1Dir,
data_DOWN: BinmodelData1Dir,
):
self[UP] = data_UP
self[DOWN] = data_DOWN

@property
def X_test(self) -> Dict[Dirn, np.ndarray]:
return {UP: self[UP].X_test, DOWN: self[DOWN].X_test}
Loading
Loading