Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

I have add other 2 algorithms to this ensemble stragegy but i am gettting error #55

Open
Rhea1918 opened this issue Aug 10, 2022 · 0 comments

Comments

@Rhea1918
Copy link

`# DRL models from Stable Baselines 3
from future import annotations

import time

import numpy as np
import pandas as pd
from stable_baselines3 import A2C
from stable_baselines3 import DDPG
from stable_baselines3 import PPO
from stable_baselines3 import SAC
from stable_baselines3 import TD3
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
from stable_baselines3.common.vec_env import DummyVecEnv

from finrl import config
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.meta.preprocessor.preprocessors import data_split

MODELS = {"a2c": A2C, "ddpg": DDPG, "td3": TD3, "sac": SAC, "ppo": PPO}

MODEL_KWARGS = {x: config.dict[f"{x.upper()}_PARAMS"] for x in MODELS.keys()}

NOISE = {
"normal": NormalActionNoise,
"ornstein_uhlenbeck": OrnsteinUhlenbeckActionNoise,
}

class TensorboardCallback(BaseCallback):
"""
Custom callback for plotting additional values in tensorboard.
"""

def __init__(self, verbose=0):
    super().__init__(verbose)

def _on_step(self) -> bool:
    try:
        self.logger.record(key="train/reward", value=self.locals["rewards"][0])
    except BaseException:
        self.logger.record(key="train/reward", value=self.locals["reward"][0])
    return True

class DRLAgent:
"""Provides implementations for DRL algorithms

Attributes
----------
    env: gym environment class
        user-defined class

Methods
-------
    get_model()
        setup DRL algorithms
    train_model()
        train DRL algorithms in a train dataset
        and output the trained model
    DRL_prediction()
        make a prediction in a test dataset and get results
"""

def __init__(self, env):
    self.env = env

def get_model(
    self,
    model_name,
    policy="MlpPolicy",
    policy_kwargs=None,
    model_kwargs=None,
    verbose=1,
    seed=None,
    tensorboard_log=None,
):
    if model_name not in MODELS:
        raise NotImplementedError("NotImplementedError")

    if model_kwargs is None:
        model_kwargs = MODEL_KWARGS[model_name]

    if "action_noise" in model_kwargs:
        n_actions = self.env.action_space.shape[-1]
        model_kwargs["action_noise"] = NOISE[model_kwargs["action_noise"]](
            mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)
        )
    print(model_kwargs)
    return MODELS[model_name](
        policy=policy,
        env=self.env,
        tensorboard_log=tensorboard_log,
        verbose=verbose,
        policy_kwargs=policy_kwargs,
        seed=seed,
        **model_kwargs,
    )

def train_model(self, model, tb_log_name, total_timesteps=5000):
    model = model.learn(
        total_timesteps=total_timesteps,
        tb_log_name=tb_log_name,
        callback=TensorboardCallback(),
    )
    return model

@staticmethod
def DRL_prediction(model, environment, deterministic=True):
    test_env, test_obs = environment.get_sb_env()
    """make a prediction"""
    account_memory = []
    actions_memory = []
    #         state_memory=[] #add memory pool to store states
    test_env.reset()
    for i in range(len(environment.df.index.unique())):
        action, _states = model.predict(test_obs, deterministic=deterministic)
        # account_memory = test_env.env_method(method_name="save_asset_memory")
        # actions_memory = test_env.env_method(method_name="save_action_memory")
        test_obs, rewards, dones, info = test_env.step(action)
        if i == (len(environment.df.index.unique()) - 2):
            account_memory = test_env.env_method(method_name="save_asset_memory")
            actions_memory = test_env.env_method(method_name="save_action_memory")
        #                 state_memory=test_env.env_method(method_name="save_state_memory") # add current state to state memory
        if dones[0]:
            print("hit end!")
            break
    return account_memory[0], actions_memory[0]

@staticmethod
def DRL_prediction_load_from_file(model_name, environment, cwd, deterministic=True):
    if model_name not in MODELS:
        raise NotImplementedError("NotImplementedError")
    try:
        # load agent
        model = MODELS[model_name].load(cwd)
        print("Successfully load model", cwd)
    except BaseException:
        raise ValueError("Fail to load agent!")

    # test on the testing env
    state = environment.reset()
    episode_returns = []  # the cumulative_return / initial_account
    episode_total_assets = [environment.initial_total_asset]
    done = False
    while not done:
        action = model.predict(state, deterministic=deterministic)[0]
        state, reward, done, _ = environment.step(action)

        total_asset = (
            environment.amount
            + (environment.price_ary[environment.day] * environment.stocks).sum()
        )
        episode_total_assets.append(total_asset)
        episode_return = total_asset / environment.initial_total_asset
        episode_returns.append(episode_return)

    print("episode_return", episode_return)
    print("Test Finished!")
    return episode_total_assets

class DRLEnsembleAgent:
@staticmethod
def get_model(
model_name,
env,
policy="MlpPolicy",
policy_kwargs=None,
model_kwargs=None,
seed=None,
verbose=1,
):

    if model_name not in MODELS:
        raise NotImplementedError("NotImplementedError")

    if model_kwargs is None:
        temp_model_kwargs = MODEL_KWARGS[model_name]
    else:
        temp_model_kwargs = model_kwargs.copy()

    if "action_noise" in temp_model_kwargs:
        n_actions = env.action_space.shape[-1]
        temp_model_kwargs["action_noise"] = NOISE[
            temp_model_kwargs["action_noise"]
        ](mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))
    print(temp_model_kwargs)
    return MODELS[model_name](
        policy=policy,
        env=env,
        tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{model_name}",
        verbose=verbose,
        policy_kwargs=policy_kwargs,
        seed=seed,
        **temp_model_kwargs,
    )

@staticmethod
def train_model(model, model_name, tb_log_name, iter_num, total_timesteps=5000):
    model = model.learn(
        total_timesteps=total_timesteps,
        tb_log_name=tb_log_name,
        callback=TensorboardCallback(),
    )
    model.save(
        f"{config.TRAINED_MODEL_DIR}/{model_name.upper()}_{total_timesteps // 1000}k_{iter_num}"
    )
    return model

@staticmethod
def get_validation_sharpe(iteration, model_name):
    """Calculate Sharpe ratio based on validation results"""
    df_total_value = pd.read_csv(
        f"results/account_value_validation_{model_name}_{iteration}.csv"
    )
    # If the agent did not make any transaction
    if df_total_value["daily_return"].var() == 0:
        if df_total_value["daily_return"].mean() > 0:
            return np.inf
        else:
            return 0.0
    else:
        return (
            (7**0.5)
            * df_total_value["daily_return"].mean()
            / df_total_value["daily_return"].std()
        )

def __init__(
    self,
    df,
    train_period,
    val_test_period,
    rebalance_window,
    validation_window,
    stock_dim,
    hmax,
    initial_amount,
    buy_cost_pct,
    sell_cost_pct,
    reward_scaling,
    state_space,
    action_space,
    tech_indicator_list,
    print_verbosity,
):

    self.df = df
    self.train_period = train_period
    self.val_test_period = val_test_period

    self.unique_trade_date = df[
        (df.date > val_test_period[0]) & (df.date <= val_test_period[1])
    ].date.unique()
    self.rebalance_window = rebalance_window
    self.validation_window = validation_window

    self.stock_dim = stock_dim
    self.hmax = hmax
    self.initial_amount = initial_amount
    self.buy_cost_pct = buy_cost_pct
    self.sell_cost_pct = sell_cost_pct
    self.reward_scaling = reward_scaling
    self.state_space = state_space
    self.action_space = action_space
    self.tech_indicator_list = tech_indicator_list
    self.print_verbosity = print_verbosity

def DRL_validation(self, model, test_data, test_env, test_obs):
    """validation process"""
    for _ in range(len(test_data.index.unique())):
        action, _states = model.predict(test_obs)
        test_obs, rewards, dones, info = test_env.step(action)

def DRL_prediction(
    self, model, name, last_state, iter_num, turbulence_threshold, initial
):
    """make a prediction based on trained model"""

    ## trading env
    trade_data = data_split(
        self.df,
        start=self.unique_trade_date[iter_num - self.rebalance_window],
        end=self.unique_trade_date[iter_num],
    )
    trade_env = DummyVecEnv(
        [
            lambda: StockTradingEnv(
                df=trade_data,
                stock_dim=self.stock_dim,
                hmax=self.hmax,
                initial_amount=self.initial_amount,
                num_stock_shares=[0] * self.stock_dim,
                buy_cost_pct=[self.buy_cost_pct] * self.stock_dim,
                sell_cost_pct=[self.sell_cost_pct] * self.stock_dim,
                reward_scaling=self.reward_scaling,
                state_space=self.state_space,
                action_space=self.action_space,
                tech_indicator_list=self.tech_indicator_list,
                turbulence_threshold=turbulence_threshold,
                initial=initial,
                previous_state=last_state,
                model_name=name,
                mode="trade",
                iteration=iter_num,
                print_verbosity=self.print_verbosity,
            )
        ]
    )

    trade_obs = trade_env.reset()

    for i in range(len(trade_data.index.unique())):
        action, _states = model.predict(trade_obs)
        trade_obs, rewards, dones, info = trade_env.step(action)
        if i == (len(trade_data.index.unique()) - 2):
            # print(env_test.render())
            last_state = trade_env.render()

    df_last_state = pd.DataFrame({"last_state": last_state})
    df_last_state.to_csv(f"results/last_state_{name}_{i}.csv", index=False)
    return last_state

def run_ensemble_strategy(
    self, A2C_model_kwargs, PPO_model_kwargs, DDPG_model_kwargs,SAC_model_kwargs,TD3_model_kwargs, timesteps_dict
):
    """Ensemble Strategy that combines PPO, A2C and DDPG"""
    print("============Start Ensemble Strategy============")
    # for ensemble model, it's necessary to feed the last state
    # of the previous model to the current model as the initial state
    last_state_ensemble = []

    ppo_sharpe_list = []
    ddpg_sharpe_list = []
    a2c_sharpe_list = []
    td3_sharpe_list = []
    sac_sharpe_list = []

    model_use = []
    validation_start_date_list = []
    validation_end_date_list = []
    iteration_list = []

    insample_turbulence = self.df[
        (self.df.date < self.train_period[1])
        & (self.df.date >= self.train_period[0])
    ]
    insample_turbulence_threshold = np.quantile(
        insample_turbulence.turbulence.values, 0.90
    )

    start = time.time()
    for i in range(
        self.rebalance_window + self.validation_window,
        len(self.unique_trade_date),
        self.rebalance_window,
    ):
        validation_start_date = self.unique_trade_date[
            i - self.rebalance_window - self.validation_window
        ]
        validation_end_date = self.unique_trade_date[i - self.rebalance_window]

        validation_start_date_list.append(validation_start_date)
        validation_end_date_list.append(validation_end_date)
        iteration_list.append(i)

        print("============================================")
        ## initial state is empty
        if i - self.rebalance_window - self.validation_window == 0:
            # inital state
            initial = True
        else:
            # previous state
            initial = False

        # Tuning trubulence index based on historical data
        # Turbulence lookback window is one quarter (63 days)
        end_date_index = self.df.index[
            self.df["date"]
            == self.unique_trade_date[
                i - self.rebalance_window - self.validation_window
            ]
        ].to_list()[-1]
        start_date_index = end_date_index - 63 + 1

        historical_turbulence = self.df.iloc[
            start_date_index : (end_date_index + 1), :
        ]

        historical_turbulence = historical_turbulence.drop_duplicates(
            subset=["date"]
        )

        historical_turbulence_mean = np.mean(
            historical_turbulence.turbulence.values
        )

        # print(historical_turbulence_mean)

        if historical_turbulence_mean > insample_turbulence_threshold:
            # if the mean of the historical data is greater than the 90% quantile of insample turbulence data
            # then we assume that the current market is volatile,
            # therefore we set the 90% quantile of insample turbulence data as the turbulence threshold
            # meaning the current turbulence can't exceed the 90% quantile of insample turbulence data
            turbulence_threshold = insample_turbulence_threshold
        else:
            # if the mean of the historical data is less than the 90% quantile of insample turbulence data
            # then we tune up the turbulence_threshold, meaning we lower the risk
            turbulence_threshold = np.quantile(
                insample_turbulence.turbulence.values, 1
            )

        turbulence_threshold = np.quantile(
            insample_turbulence.turbulence.values, 0.99
        )
        print("turbulence_threshold: ", turbulence_threshold)

        ############## Environment Setup starts ##############
        ## training env
        train = data_split(
            self.df,
            start=self.train_period[0],
            end=self.unique_trade_date[
                i - self.rebalance_window - self.validation_window
            ],
        )
        self.train_env = DummyVecEnv(
            [
                lambda: StockTradingEnv(
                    df=train,
                    stock_dim=self.stock_dim,
                    hmax=self.hmax,
                    initial_amount=self.initial_amount,
                    num_stock_shares=[0] * self.stock_dim,
                    buy_cost_pct=[self.buy_cost_pct] * self.stock_dim,
                    sell_cost_pct=[self.sell_cost_pct] * self.stock_dim,
                    reward_scaling=self.reward_scaling,
                    state_space=self.state_space,
                    action_space=self.action_space,
                    tech_indicator_list=self.tech_indicator_list,
                    print_verbosity=self.print_verbosity,
                )
            ]
        )

        validation = data_split(
            self.df,
            start=self.unique_trade_date[
                i - self.rebalance_window - self.validation_window
            ],
            end=self.unique_trade_date[i - self.rebalance_window],
        )
        ############## Environment Setup ends ##############

        ############## Training and Validation starts ##############
        print(
            "======Model training from: ",
            self.train_period[0],
            "to ",
            self.unique_trade_date[
                i - self.rebalance_window - self.validation_window
            ],
        )
        # print("training: ",len(data_split(df, start=20090000, end=test.datadate.unique()[i-rebalance_window]) ))
        # print("==============Model Training===========")
        print("======A2C Training========")
        model_a2c = self.get_model(
            "a2c", self.train_env, policy="MlpPolicy", model_kwargs=A2C_model_kwargs
        )
        model_a2c = self.train_model(
            model_a2c,
            "a2c",
            tb_log_name=f"a2c_{i}",
            iter_num=i,
            total_timesteps=timesteps_dict["a2c"],
        )  # 100_000

        print(
            "======A2C Validation from: ",
            validation_start_date,
            "to ",
            validation_end_date,
        )
        val_env_a2c = DummyVecEnv(
            [
                lambda: StockTradingEnv(
                    df=validation,
                    stock_dim=self.stock_dim,
                    hmax=self.hmax,
                    initial_amount=self.initial_amount,
                    num_stock_shares=[0] * self.stock_dim,
                    buy_cost_pct=[self.buy_cost_pct] * self.stock_dim,
                    sell_cost_pct=[self.sell_cost_pct] * self.stock_dim,
                    reward_scaling=self.reward_scaling,
                    state_space=self.state_space,
                    action_space=self.action_space,
                    tech_indicator_list=self.tech_indicator_list,
                    turbulence_threshold=turbulence_threshold,
                    iteration=i,
                    model_name="A2C",
                    mode="validation",
                    print_verbosity=self.print_verbosity,
                )
            ]
        )
        val_obs_a2c = val_env_a2c.reset()
        self.DRL_validation(
            model=model_a2c,
            test_data=validation,
            test_env=val_env_a2c,
            test_obs=val_obs_a2c,
        )
        sharpe_a2c = self.get_validation_sharpe(i, model_name="A2C")
        print("A2C Sharpe Ratio: ", sharpe_a2c)

        print("======PPO Training========")
        model_ppo = self.get_model(
            "ppo", self.train_env, policy="MlpPolicy", model_kwargs=PPO_model_kwargs
        )
        model_ppo = self.train_model(
            model_ppo,
            "ppo",
            tb_log_name=f"ppo_{i}",
            iter_num=i,
            total_timesteps=timesteps_dict["ppo"],
        )  # 100_000
        print(
            "======PPO Validation from: ",
            validation_start_date,
            "to ",
            validation_end_date,
        )
        val_env_ppo = DummyVecEnv(
            [
                lambda: StockTradingEnv(
                    df=validation,
                    stock_dim=self.stock_dim,
                    hmax=self.hmax,
                    initial_amount=self.initial_amount,
                    num_stock_shares=[0] * self.stock_dim,
                    buy_cost_pct=[self.buy_cost_pct] * self.stock_dim,
                    sell_cost_pct=[self.sell_cost_pct] * self.stock_dim,
                    reward_scaling=self.reward_scaling,
                    state_space=self.state_space,
                    action_space=self.action_space,
                    tech_indicator_list=self.tech_indicator_list,
                    turbulence_threshold=turbulence_threshold,
                    iteration=i,
                    model_name="PPO",
                    mode="validation",
                    print_verbosity=self.print_verbosity,
                )
            ]
        )
        val_obs_ppo = val_env_ppo.reset()
        self.DRL_validation(
            model=model_ppo,
            test_data=validation,
            test_env=val_env_ppo,
            test_obs=val_obs_ppo,
        )
        sharpe_ppo = self.get_validation_sharpe(i, model_name="PPO")
        print("PPO Sharpe Ratio: ", sharpe_ppo)
        
        print("======SAC Training========")

model_sac = self.get_model(
"sac", self.train_env, policy="MlpPolicy", model_kwargs=SAC_model_kwargs
)
model_sac = self.train_model(
model_sac,
"sac",
tb_log_name=f"sac_{i}",
iter_num=i,
total_timesteps=timesteps_dict["sac"],
) # 100_000

        print(
            "======SAC Validation from: ",
            validation_start_date,
            "to ",
            validation_end_date,
        )
        val_env_sac = DummyVecEnv(
            [
                lambda: StockTradingEnv(
                    df=validation,
                    stock_dim=self.stock_dim,
                    hmax=self.hmax,
                    initial_amount=self.initial_amount,
                    num_stock_shares=[0] * self.stock_dim,
                    buy_cost_pct=[self.buy_cost_pct] * self.stock_dim,
                    sell_cost_pct=[self.sell_cost_pct] * self.stock_dim,
                    reward_scaling=self.reward_scaling,
                    state_space=self.state_space,
                    action_space=self.action_space,
                    tech_indicator_list=self.tech_indicator_list,
                    turbulence_threshold=turbulence_threshold,
                    iteration=i,
                    model_name="SAC",
                    mode="validation",
                    print_verbosity=self.print_verbosity,
                )
            ]
        )
        val_obs_sac = val_env_sac.reset()
        self.DRL_validation(
            model=model_sac,
            test_data=validation,
            test_env=val_env_sac,
            test_obs=val_obs_sac,
        )
        sharpe_sac = self.get_validation_sharpe(i, model_name=“SAC")
        print(“SAC Sharpe Ratio: ", sharpe_sac)

print("======TD3 Training========")

model_td3 = self.get_model(
"td3", self.train_env, policy="MlpPolicy", model_kwargs=TD3_model_kwargs
)
model_td3 = self.train_model(
model_td3,
"td3",
tb_log_name=f"td3_{i}",
iter_num=i,
total_timesteps=timesteps_dict["td3"],
) # 100_000

        print(
            "======TD3 Validation from: ",
            validation_start_date,
            "to ",
            validation_end_date,
        )
        val_env_td3 = DummyVecEnv(
            [
                lambda: StockTradingEnv(
                    df=validation,
                    stock_dim=self.stock_dim,
                    hmax=self.hmax,
                    initial_amount=self.initial_amount,
                    num_stock_shares=[0] * self.stock_dim,
                    buy_cost_pct=[self.buy_cost_pct] * self.stock_dim,
                    sell_cost_pct=[self.sell_cost_pct] * self.stock_dim,
                    reward_scaling=self.reward_scaling,
                    state_space=self.state_space,
                    action_space=self.action_space,
                    tech_indicator_list=self.tech_indicator_list,
                    turbulence_threshold=turbulence_threshold,
                    iteration=i,
                    model_name="TD3”,
                    mode="validation",
                    print_verbosity=self.print_verbosity,
                )
            ]
        )
        val_obs_td3 = val_env_td3.reset()
        self.DRL_validation(
            model=model_td3,
            test_data=validation,
            test_env=val_env_td3,
            test_obs=val_obs_td3,
        )
        sharpe_td3 = self.get_validation_sharpe(i, model_name=“TD3”)
        print(“TD3 Sharpe Ratio: ", sharpe_td3)


        print("======DDPG Training========")
        model_ddpg = self.get_model(
            "ddpg",
            self.train_env,
            policy="MlpPolicy",
            model_kwargs=DDPG_model_kwargs,
        )
        model_ddpg = self.train_model(
            model_ddpg,
            "ddpg",
            tb_log_name=f"ddpg_{i}",
            iter_num=i,
            total_timesteps=timesteps_dict["ddpg"],
        )  # 50_000
        print(
            "======DDPG Validation from: ",
            validation_start_date,
            "to ",
            validation_end_date,
        )
        val_env_ddpg = DummyVecEnv(
            [
                lambda: StockTradingEnv(
                    df=validation,
                    stock_dim=self.stock_dim,
                    hmax=self.hmax,
                    initial_amount=self.initial_amount,
                    num_stock_shares=[0] * self.stock_dim,
                    buy_cost_pct=[self.buy_cost_pct] * self.stock_dim,
                    sell_cost_pct=[self.sell_cost_pct] * self.stock_dim,
                    reward_scaling=self.reward_scaling,
                    state_space=self.state_space,
                    action_space=self.action_space,
                    tech_indicator_list=self.tech_indicator_list,
                    turbulence_threshold=turbulence_threshold,
                    iteration=i,
                    model_name="DDPG",
                    mode="validation",
                    print_verbosity=self.print_verbosity,
                )
            ]
        )
        val_obs_ddpg = val_env_ddpg.reset()
        self.DRL_validation(
            model=model_ddpg,
            test_data=validation,
            test_env=val_env_ddpg,
            test_obs=val_obs_ddpg,
        )
        sharpe_ddpg = self.get_validation_sharpe(i, model_name="DDPG")

        ppo_sharpe_list.append(sharpe_ppo)
        a2c_sharpe_list.append(sharpe_a2c)
        sac_sharpe_list.append(sharpe_sac)
        td3_sharpe_list.append(sharpe_td3)
        ddpg_sharpe_list.append(sharpe_ddpg)

        print(
            "======Best Model Retraining from: ",
            self.train_period[0],
            "to ",
            self.unique_trade_date[i - self.rebalance_window],
        )
        # Environment setup for model retraining up to first trade date
        # train_full = data_split(self.df, start=self.train_period[0], end=self.unique_trade_date[i - self.rebalance_window])
        # self.train_full_env = DummyVecEnv([lambda: StockTradingEnv(train_full,
        #                                                    self.stock_dim,
        #                                                    self.hmax,
        #                                                    self.initial_amount,
        #                                                    self.buy_cost_pct,
        #                                                    self.sell_cost_pct,
        #                                                    self.reward_scaling,
        #                                                    self.state_space,
        #                                                    self.action_space,
        #                                                    self.tech_indicator_list,
        #                                                    print_verbosity=self.print_verbosity)])
        # Model Selection based on sharpe ratio
        if (sharpe_ppo >= sharpe_a2c) & (sharpe_ppo >= sharpe_ddpg) & (sharpe_ppo >= sharpe_sac) & (sharpe_ppo >= sharpe_td3):
            model_use.append("PPO")
            model_ensemble = model_ppo

            # model_ensemble = self.get_model("ppo",self.train_full_env,policy="MlpPolicy",model_kwargs=PPO_model_kwargs)
            # model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ppo']) #100_000
        elif (sharpe_a2c > sharpe_ppo) & (sharpe_a2c > sharpe_ddpg) & (sharpe_a2c >= sharpe_sac) & (sharpe_a2c >= sharpe_td3)::
            model_use.append("A2C")
            model_ensemble = model_a2c

            # model_ensemble = self.get_model("a2c",self.train_full_env,policy="MlpPolicy",model_kwargs=A2C_model_kwargs)
            # model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['a2c']) #100_000
            
            elif (sharpe_td3 > sharpe_ppo) & (sharpe_td3 > sharpe_ddpg) & (sharpe_td3 >= sharpe_sac) & (sharpe_td3 >= sharpe_a2c)::
            model_use.append(“TD3”)
            model_ensemble = model_td3

            # model_ensemble = self.get_model("td3",self.train_full_env,policy="MlpPolicy",model_kwargs=TD3_model_kwargs)
            # model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['td3']) #100_000
     elif (sharpe_sac > sharpe_ppo) & (sharpe_sac > sharpe_ddpg) & (sharpe_sac >= sharpe_td3) & (sharpe_sac >= sharpe_a2c)::
            model_use.append(“SAC”)
            model_ensemble = model_sac

            # model_ensemble = self.get_model("sac",self.train_full_env,policy="MlpPolicy",model_kwargs=SAC_model_kwargs)
            # model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['sac']) #100_000
    
        else:
            model_use.append("DDPG")
            model_ensemble = model_ddpg

            # model_ensemble = self.get_model("ddpg",self.train_full_env,policy="MlpPolicy",model_kwargs=DDPG_model_kwargs)
            # model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ddpg']) #50_000

        ############## Training and Validation ends ##############

        ############## Trading starts ##############
        print(
            "======Trading from: ",
            self.unique_trade_date[i - self.rebalance_window],
            "to ",
            self.unique_trade_date[i],
        )
        # print("Used Model: ", model_ensemble)
        last_state_ensemble = self.DRL_prediction(
            model=model_ensemble,
            name="ensemble",
            last_state=last_state_ensemble,
            iter_num=i,
            turbulence_threshold=turbulence_threshold,
            initial=initial,
        )
        ############## Trading ends ##############

    end = time.time()
    print("Ensemble Strategy took: ", (end - start) / 60, " minutes")

    df_summary = pd.DataFrame(
        [
            iteration_list,
            validation_start_date_list,
            validation_end_date_list,
            model_use,
            a2c_sharpe_list,
            ppo_sharpe_list,
            ddpg_sharpe_list,
            sac_sharpe_list,
            td3_sharpe_list,
        ]
    ).T
    df_summary.columns = [
        "Iter",
        "Val Start",
        "Val End",
        "Model Used",
        "A2C Sharpe",
        "PPO Sharpe",
        "DDPG Sharpe",
        "SAC Sharpe",
        "TD3 Sharpe",
    ]

    return df_summary

Screen Shot 2022-08-11 at 12 26 11 AM

`

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant