diff --git a/fundamental_back_testing.ipynb b/fundamental_back_testing.ipynb index c7e531fc3..72cc597f6 100644 --- a/fundamental_back_testing.ipynb +++ b/fundamental_back_testing.ipynb @@ -2,8 +2,10 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, - "metadata": {}, + "execution_count": 14, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import numpy as np\n", @@ -39,19 +41,19 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ - "data_adj = pd.read_csv(\"final_ratios.csv\",index_col=0)\n", - "weight_meanv = pd.read_excel('mean_weighted.xlsx',index_col=0)\n", - "weight_minv = pd.read_excel('minimum_weighted.xlsx',index_col=0)\n", - "weight_equal=pd.read_excel('equally_weighted.xlsx',index_col=0)" + "data_adj = pd.read_csv(\"final_ratios_oct.csv\")\n", + "weight_meanv = pd.read_excel('mean_weighted_rf.xlsx')\n", + "weight_minv = pd.read_excel('minimum_weighted_rf.xlsx')\n", + "weight_equal=pd.read_excel('equally_weighted_rf.xlsx')" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -75,6 +77,7 @@ " \n", " \n", " \n", + " Unnamed: 0\n", " date\n", " gvkey\n", " tic\n", @@ -82,7 +85,6 @@ " adj_close_q\n", " y_return\n", " EPS\n", - " BPS\n", " DPS\n", " cur_ratio\n", " quick_ratio\n", @@ -98,148 +100,295 @@ " \n", " \n", " 0\n", - " 1996-06-01\n", - " 1045\n", - " AAL\n", - " 20.0\n", - " 44.7500\n", - " 0.016621\n", - " 2.02\n", - " 49.584416\n", - " 0.0\n", - " 0.717784\n", - " 0.475086\n", - " 0.179124\n", + " 66261\n", + " 1998-03-01\n", + " 126554\n", + " A\n", + " 35.0\n", + " 0.00\n", + " 0.000000\n", + " 0.42\n", + " 0.000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", " 0.000000\n", - " 0.800010\n", - " 4.000257\n", - " 44.306931\n", - " 1.599698\n", - " 1.768866\n", " \n", " \n", " 1\n", - " 1996-09-01\n", - " 1045\n", - " AAL\n", - " 20.0\n", - " 45.5000\n", - " -0.133531\n", - " 5.44\n", - " 55.186813\n", - " 0.0\n", - " 0.738051\n", - " 0.497600\n", - " 0.185348\n", + " 66262\n", + " 1998-06-01\n", + " 126554\n", + " A\n", + " 35.0\n", + " 0.00\n", + " 0.000000\n", + " 0.67\n", + " 0.000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", " 0.000000\n", - " 0.744583\n", - " 2.915173\n", - " 27.164179\n", - " 1.820000\n", - " 1.648945\n", " \n", " \n", " 2\n", - " 1996-12-01\n", - " 1045\n", - " AAL\n", - " 20.0\n", - " 39.8125\n", - " 0.101428\n", - " 8.53\n", - " 58.164835\n", - " 0.0\n", - " 0.789316\n", - " 0.557623\n", - " 0.269708\n", + " 66263\n", + " 1998-09-01\n", + " 126554\n", + " A\n", + " 35.0\n", + " 0.00\n", + " 0.000000\n", + " 0.81\n", + " 0.000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", " 0.000000\n", - " 0.737307\n", - " 2.806726\n", - " 26.021242\n", - " 1.588311\n", - " 1.368954\n", " \n", " \n", " 3\n", - " 1997-03-01\n", - " 1045\n", - " AAL\n", - " 20.0\n", - " 44.0625\n", - " -0.065958\n", - " 11.63\n", - " 62.285714\n", - " 0.0\n", - " 0.803090\n", - " 0.573662\n", - " 0.325368\n", - " 9.710564\n", - " 0.723472\n", - " 2.616267\n", - " 21.759259\n", - " 1.850767\n", - " 1.414851\n", + " 66264\n", + " 1998-12-01\n", + " 126554\n", + " A\n", + " 35.0\n", + " 0.00\n", + " 0.000000\n", + " 0.68\n", + " 0.000\n", + " 1.923077\n", + " 0.759850\n", + " 0.000000\n", + " 4.909465\n", + " 0.394024\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", " \n", " \n", " 4\n", - " 1997-06-01\n", - " 1045\n", - " AAL\n", - " 20.0\n", - " 41.2500\n", - " 0.114410\n", - " 1.67\n", - " 64.000000\n", - " 0.0\n", - " 0.851293\n", - " 0.611247\n", - " 0.329496\n", - " 9.005358\n", - " 0.713428\n", - " 2.489526\n", - " 49.401198\n", - " 1.696227\n", - " 1.289062\n", + " 66265\n", + " 1999-03-01\n", + " 126554\n", + " A\n", + " 35.0\n", + " 0.00\n", + " 0.000000\n", + " 0.19\n", + " 0.000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " \n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 77580\n", + " 41126\n", + " 2021-09-01\n", + " 13721\n", + " ZTS\n", + " 35.0\n", + " 186.36\n", + " 0.040899\n", + " 2.25\n", + " 0.250\n", + " 3.370636\n", + " 2.308943\n", + " 1.749402\n", + " 4.676923\n", + " 0.690667\n", + " 2.232767\n", + " 172.555556\n", + " 45.319980\n", + " 20.306517\n", + " \n", + " \n", + " 77581\n", + " 41127\n", + " 2021-12-01\n", + " 13721\n", + " ZTS\n", + " 35.0\n", + " 194.14\n", + " 0.228712\n", + " 3.42\n", + " 0.250\n", + " 4.542488\n", + " 3.008838\n", + " 2.225697\n", + " 4.889757\n", + " 0.658446\n", + " 1.927793\n", + " 167.362069\n", + " 46.111805\n", + " 19.632734\n", + " \n", + " \n", + " 77582\n", + " 41128\n", + " 2022-03-01\n", + " 13721\n", + " ZTS\n", + " 35.0\n", + " 244.03\n", + " -0.257716\n", + " 4.29\n", + " 0.250\n", + " 3.856427\n", + " 2.569839\n", + " 1.939343\n", + " 5.133274\n", + " 0.673094\n", + " 2.058979\n", + " 277.306818\n", + " 58.539205\n", + " 25.379013\n", + " \n", + " \n", + " 77583\n", + " 41129\n", + " 2022-06-01\n", + " 13721\n", + " ZTS\n", + " 35.0\n", + " 188.59\n", + " -0.092721\n", + " 1.26\n", + " 0.325\n", + " 2.253461\n", + " 1.436058\n", + " 1.033289\n", + " 4.838789\n", + " 0.663925\n", + " 1.975526\n", + " 149.674603\n", + " 44.723354\n", + " 19.077996\n", + " \n", + " \n", + " 77584\n", + " 41130\n", + " 2022-09-01\n", + " 13721\n", + " ZTS\n", + " 35.0\n", + " 171.89\n", + " 0.000000\n", + " 2.39\n", + " 0.325\n", + " 2.179941\n", + " 1.292363\n", + " 0.869223\n", + " 4.608830\n", + " 0.667393\n", + " 2.006550\n", + " 152.115044\n", + " 39.235211\n", + " 17.587312\n", " \n", " \n", "\n", + "

77585 rows × 18 columns

\n", "" ], "text/plain": [ - " date gvkey tic gsector adj_close_q y_return EPS BPS \\\n", - "0 1996-06-01 1045 AAL 20.0 44.7500 0.016621 2.02 49.584416 \n", - "1 1996-09-01 1045 AAL 20.0 45.5000 -0.133531 5.44 55.186813 \n", - "2 1996-12-01 1045 AAL 20.0 39.8125 0.101428 8.53 58.164835 \n", - "3 1997-03-01 1045 AAL 20.0 44.0625 -0.065958 11.63 62.285714 \n", - "4 1997-06-01 1045 AAL 20.0 41.2500 0.114410 1.67 64.000000 \n", + " Unnamed: 0 date gvkey tic gsector adj_close_q y_return \\\n", + "0 66261 1998-03-01 126554 A 35.0 0.00 0.000000 \n", + "1 66262 1998-06-01 126554 A 35.0 0.00 0.000000 \n", + "2 66263 1998-09-01 126554 A 35.0 0.00 0.000000 \n", + "3 66264 1998-12-01 126554 A 35.0 0.00 0.000000 \n", + "4 66265 1999-03-01 126554 A 35.0 0.00 0.000000 \n", + "... ... ... ... ... ... ... ... \n", + "77580 41126 2021-09-01 13721 ZTS 35.0 186.36 0.040899 \n", + "77581 41127 2021-12-01 13721 ZTS 35.0 194.14 0.228712 \n", + "77582 41128 2022-03-01 13721 ZTS 35.0 244.03 -0.257716 \n", + "77583 41129 2022-06-01 13721 ZTS 35.0 188.59 -0.092721 \n", + "77584 41130 2022-09-01 13721 ZTS 35.0 171.89 0.000000 \n", "\n", - " DPS cur_ratio quick_ratio cash_ratio acc_rec_turnover debt_ratio \\\n", - "0 0.0 0.717784 0.475086 0.179124 0.000000 0.800010 \n", - "1 0.0 0.738051 0.497600 0.185348 0.000000 0.744583 \n", - "2 0.0 0.789316 0.557623 0.269708 0.000000 0.737307 \n", - "3 0.0 0.803090 0.573662 0.325368 9.710564 0.723472 \n", - "4 0.0 0.851293 0.611247 0.329496 9.005358 0.713428 \n", + " EPS DPS cur_ratio quick_ratio cash_ratio acc_rec_turnover \\\n", + "0 0.42 0.000 0.000000 0.000000 0.000000 0.000000 \n", + "1 0.67 0.000 0.000000 0.000000 0.000000 0.000000 \n", + "2 0.81 0.000 0.000000 0.000000 0.000000 0.000000 \n", + "3 0.68 0.000 1.923077 0.759850 0.000000 4.909465 \n", + "4 0.19 0.000 0.000000 0.000000 0.000000 0.000000 \n", + "... ... ... ... ... ... ... \n", + "77580 2.25 0.250 3.370636 2.308943 1.749402 4.676923 \n", + "77581 3.42 0.250 4.542488 3.008838 2.225697 4.889757 \n", + "77582 4.29 0.250 3.856427 2.569839 1.939343 5.133274 \n", + "77583 1.26 0.325 2.253461 1.436058 1.033289 4.838789 \n", + "77584 2.39 0.325 2.179941 1.292363 0.869223 4.608830 \n", "\n", - " debt_to_equity pe ps pb \n", - "0 4.000257 44.306931 1.599698 1.768866 \n", - "1 2.915173 27.164179 1.820000 1.648945 \n", - "2 2.806726 26.021242 1.588311 1.368954 \n", - "3 2.616267 21.759259 1.850767 1.414851 \n", - "4 2.489526 49.401198 1.696227 1.289062 " + " debt_ratio debt_to_equity pe ps pb \n", + "0 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "1 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "2 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "3 0.394024 0.000000 0.000000 0.000000 0.000000 \n", + "4 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "... ... ... ... ... ... \n", + "77580 0.690667 2.232767 172.555556 45.319980 20.306517 \n", + "77581 0.658446 1.927793 167.362069 46.111805 19.632734 \n", + "77582 0.673094 2.058979 277.306818 58.539205 25.379013 \n", + "77583 0.663925 1.975526 149.674603 44.723354 19.077996 \n", + "77584 0.667393 2.006550 152.115044 39.235211 17.587312 \n", + "\n", + "[77585 rows x 18 columns]" ] }, - "execution_count": 3, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "data_adj.head()" + "data_adj" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -263,6 +412,7 @@ " \n", " \n", " \n", + " Unnamed: 0\n", " trade_date\n", " gvkey\n", " weights\n", @@ -272,53 +422,58 @@ " \n", " \n", " 0\n", - " 2018-03-01\n", - " 1230\n", - " 0.00590\n", - " 0.026920\n", + " 0\n", + " 2001-03-01\n", + " 1078\n", + " 0.00000\n", + " 0.044809\n", " \n", " \n", " 1\n", - " 2018-03-01\n", - " 1678\n", - " 0.00572\n", - " 0.013570\n", + " 1\n", + " 2001-03-01\n", + " 1230\n", + " 0.00000\n", + " 0.019396\n", " \n", " \n", " 2\n", - " 2018-03-01\n", - " 1722\n", - " 0.00572\n", - " 0.035988\n", + " 2\n", + " 2001-03-01\n", + " 1246\n", + " 0.01237\n", + " 0.014087\n", " \n", " \n", " 3\n", - " 2018-03-01\n", - " 2574\n", - " 0.00549\n", - " 0.035439\n", + " 3\n", + " 2001-03-01\n", + " 1327\n", + " 0.01888\n", + " 0.128518\n", " \n", " \n", " 4\n", - " 2018-03-01\n", - " 2751\n", - " 0.00572\n", - " 0.028170\n", + " 4\n", + " 2001-03-01\n", + " 1602\n", + " 0.00000\n", + " 0.045424\n", " \n", " \n", "\n", "" ], "text/plain": [ - " trade_date gvkey weights predicted_return\n", - "0 2018-03-01 1230 0.00590 0.026920\n", - "1 2018-03-01 1678 0.00572 0.013570\n", - "2 2018-03-01 1722 0.00572 0.035988\n", - "3 2018-03-01 2574 0.00549 0.035439\n", - "4 2018-03-01 2751 0.00572 0.028170" + " Unnamed: 0 trade_date gvkey weights predicted_return\n", + "0 0 2001-03-01 1078 0.00000 0.044809\n", + "1 1 2001-03-01 1230 0.00000 0.019396\n", + "2 2 2001-03-01 1246 0.01237 0.014087\n", + "3 3 2001-03-01 1327 0.01888 0.128518\n", + "4 4 2001-03-01 1602 0.00000 0.045424" ] }, - "execution_count": 4, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -329,7 +484,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -353,6 +508,7 @@ " \n", " \n", " \n", + " Unnamed: 0\n", " trade_date\n", " gvkey\n", " weights\n", @@ -362,53 +518,58 @@ " \n", " \n", " 0\n", - " 2018-03-01\n", - " 1230\n", - " 0.00531\n", - " 0.026920\n", + " 0\n", + " 2001-03-01\n", + " 1078\n", + " 0.00662\n", + " 0.044809\n", " \n", " \n", " 1\n", - " 2018-03-01\n", - " 1678\n", - " 0.00531\n", - " 0.013570\n", + " 1\n", + " 2001-03-01\n", + " 1230\n", + " 0.00631\n", + " 0.019396\n", " \n", " \n", " 2\n", - " 2018-03-01\n", - " 1722\n", - " 0.00532\n", - " 0.035988\n", + " 2\n", + " 2001-03-01\n", + " 1246\n", + " 0.00629\n", + " 0.014087\n", " \n", " \n", " 3\n", - " 2018-03-01\n", - " 2574\n", - " 0.00531\n", - " 0.035439\n", + " 3\n", + " 2001-03-01\n", + " 1327\n", + " 0.00628\n", + " 0.128518\n", " \n", " \n", " 4\n", - " 2018-03-01\n", - " 2751\n", - " 0.00531\n", - " 0.028170\n", + " 4\n", + " 2001-03-01\n", + " 1602\n", + " 0.00628\n", + " 0.045424\n", " \n", " \n", "\n", "" ], "text/plain": [ - " trade_date gvkey weights predicted_return\n", - "0 2018-03-01 1230 0.00531 0.026920\n", - "1 2018-03-01 1678 0.00531 0.013570\n", - "2 2018-03-01 1722 0.00532 0.035988\n", - "3 2018-03-01 2574 0.00531 0.035439\n", - "4 2018-03-01 2751 0.00531 0.028170" + " Unnamed: 0 trade_date gvkey weights predicted_return\n", + "0 0 2001-03-01 1078 0.00662 0.044809\n", + "1 1 2001-03-01 1230 0.00631 0.019396\n", + "2 2 2001-03-01 1246 0.00629 0.014087\n", + "3 3 2001-03-01 1327 0.00628 0.128518\n", + "4 4 2001-03-01 1602 0.00628 0.045424" ] }, - "execution_count": 5, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -419,7 +580,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -443,6 +604,7 @@ " \n", " \n", " \n", + " Unnamed: 0\n", " trade_date\n", " gvkey\n", " weights\n", @@ -452,53 +614,58 @@ " \n", " \n", " 0\n", - " 2018-03-01\n", - " 1230\n", - " 0.005556\n", - " 0.026920\n", + " 0\n", + " 2001-03-01\n", + " 1078\n", + " 0.006579\n", + " 0.044809\n", " \n", " \n", " 1\n", - " 2018-03-01\n", - " 1678\n", - " 0.005556\n", - " 0.013570\n", + " 1\n", + " 2001-03-01\n", + " 1230\n", + " 0.006579\n", + " 0.019396\n", " \n", " \n", " 2\n", - " 2018-03-01\n", - " 1722\n", - " 0.005556\n", - " 0.035988\n", + " 2\n", + " 2001-03-01\n", + " 1246\n", + " 0.006579\n", + " 0.014087\n", " \n", " \n", " 3\n", - " 2018-03-01\n", - " 2574\n", - " 0.005556\n", - " 0.035439\n", + " 3\n", + " 2001-03-01\n", + " 1327\n", + " 0.006579\n", + " 0.128518\n", " \n", " \n", " 4\n", - " 2018-03-01\n", - " 2751\n", - " 0.005556\n", - " 0.028170\n", + " 4\n", + " 2001-03-01\n", + " 1602\n", + " 0.006579\n", + " 0.045424\n", " \n", " \n", "\n", "" ], "text/plain": [ - " trade_date gvkey weights predicted_return\n", - "0 2018-03-01 1230 0.005556 0.026920\n", - "1 2018-03-01 1678 0.005556 0.013570\n", - "2 2018-03-01 1722 0.005556 0.035988\n", - "3 2018-03-01 2574 0.005556 0.035439\n", - "4 2018-03-01 2751 0.005556 0.028170" + " Unnamed: 0 trade_date gvkey weights predicted_return\n", + "0 0 2001-03-01 1078 0.006579 0.044809\n", + "1 1 2001-03-01 1230 0.006579 0.019396\n", + "2 2 2001-03-01 1246 0.006579 0.014087\n", + "3 3 2001-03-01 1327 0.006579 0.128518\n", + "4 4 2001-03-01 1602 0.006579 0.045424" ] }, - "execution_count": 6, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -509,14 +676,14 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "3870 3870 3870\n" + "16319 16319 16319\n" ] } ], @@ -535,7 +702,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [] }, @@ -555,8 +724,10 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": {}, + "execution_count": 21, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "#get unique stock name from table\n", @@ -568,16 +739,16 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "644" + "858" ] }, - "execution_count": 11, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -588,7 +759,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -611,16 +782,16 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "107" + "108" ] }, - "execution_count": 13, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -631,148 +802,7 @@ }, { "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trade_dategvkeyweightspredicted_return
02018-03-0112300.005900.026920
12018-03-0116780.005720.013570
22018-03-0117220.005720.035988
32018-03-0125740.005490.035439
42018-03-0127510.005720.028170
...............
38652022-09-011876970.001440.036295
38662022-09-012416370.000230.017479
38672022-09-012607740.000000.027837
38682022-09-012878820.00012-0.000764
38692022-09-013160560.049790.034261
\n", - "

3870 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " trade_date gvkey weights predicted_return\n", - "0 2018-03-01 1230 0.00590 0.026920\n", - "1 2018-03-01 1678 0.00572 0.013570\n", - "2 2018-03-01 1722 0.00572 0.035988\n", - "3 2018-03-01 2574 0.00549 0.035439\n", - "4 2018-03-01 2751 0.00572 0.028170\n", - "... ... ... ... ...\n", - "3865 2022-09-01 187697 0.00144 0.036295\n", - "3866 2022-09-01 241637 0.00023 0.017479\n", - "3867 2022-09-01 260774 0.00000 0.027837\n", - "3868 2022-09-01 287882 0.00012 -0.000764\n", - "3869 2022-09-01 316056 0.04979 0.034261\n", - "\n", - "[3870 rows x 4 columns]" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "weight_meanv" - ] - }, - { - "cell_type": "code", - "execution_count": 15, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -783,44 +813,157 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "19" + "['2001-03-01',\n", + " '2001-06-01',\n", + " '2001-09-01',\n", + " '2001-12-01',\n", + " '2002-03-01',\n", + " '2002-06-01',\n", + " '2002-09-01',\n", + " '2002-12-01',\n", + " '2003-03-01',\n", + " '2003-06-01',\n", + " '2003-09-01',\n", + " '2003-12-01',\n", + " '2004-03-01',\n", + " '2004-06-01',\n", + " '2004-09-01',\n", + " '2004-12-01',\n", + " '2005-03-01',\n", + " '2005-06-01',\n", + " '2005-09-01',\n", + " '2005-12-01',\n", + " '2006-03-01',\n", + " '2006-06-01',\n", + " '2006-09-01',\n", + " '2006-12-01',\n", + " '2007-03-01',\n", + " '2007-06-01',\n", + " '2007-09-01',\n", + " '2007-12-01',\n", + " '2008-03-01',\n", + " '2008-06-01',\n", + " '2008-09-01',\n", + " '2008-12-01',\n", + " '2009-03-01',\n", + " '2009-06-01',\n", + " '2009-09-01',\n", + " '2009-12-01',\n", + " '2010-03-01',\n", + " '2010-06-01',\n", + " '2010-09-01',\n", + " '2010-12-01',\n", + " '2011-03-01',\n", + " '2011-06-01',\n", + " '2011-09-01',\n", + " '2011-12-01',\n", + " '2012-03-01',\n", + " '2012-06-01',\n", + " '2012-09-01',\n", + " '2012-12-01',\n", + " '2013-03-01',\n", + " '2013-06-01',\n", + " '2013-09-01',\n", + " '2013-12-01',\n", + " '2014-03-01',\n", + " '2014-06-01',\n", + " '2014-09-01',\n", + " '2014-12-01',\n", + " '2015-03-01',\n", + " '2015-06-01',\n", + " '2015-09-01',\n", + " '2015-12-01',\n", + " '2016-03-01',\n", + " '2016-06-01',\n", + " '2016-09-01',\n", + " '2016-12-01',\n", + " '2017-03-01',\n", + " '2017-06-01',\n", + " '2017-09-01',\n", + " '2017-12-01',\n", + " '2018-03-01',\n", + " '2018-06-01',\n", + " '2018-09-01',\n", + " '2018-12-01',\n", + " '2019-03-01',\n", + " '2019-06-01',\n", + " '2019-09-01',\n", + " '2019-12-01',\n", + " '2020-03-01',\n", + " '2020-06-01',\n", + " '2020-09-01',\n", + " '2020-12-01',\n", + " '2021-03-01',\n", + " '2021-06-01',\n", + " '2021-09-01',\n", + " '2021-12-01',\n", + " '2022-03-01',\n", + " '2022-06-01',\n", + " '2022-09-01',\n", + " '2022-12-01']" ] }, - "execution_count": 16, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "len(tradedate)" + "tradedate" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "tradedate_full = [x for x in tradedate if x > '2018-01-01']" + ] + }, + { + "cell_type": "code", + "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "19" + "['2018-03-01',\n", + " '2018-06-01',\n", + " '2018-09-01',\n", + " '2018-12-01',\n", + " '2019-03-01',\n", + " '2019-06-01',\n", + " '2019-09-01',\n", + " '2019-12-01',\n", + " '2020-03-01',\n", + " '2020-06-01',\n", + " '2020-09-01',\n", + " '2020-12-01',\n", + " '2021-03-01',\n", + " '2021-06-01',\n", + " '2021-09-01',\n", + " '2021-12-01',\n", + " '2022-03-01',\n", + " '2022-06-01',\n", + " '2022-09-01',\n", + " '2022-12-01']" ] }, - "execution_count": 17, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "#add the last trade date 20170901\n", - "tradedate_full = tradedate.copy()\n", - "len(tradedate_full)" + "tradedate_full" ] }, { @@ -832,14 +975,16 @@ }, { "cell_type": "code", - "execution_count": 18, - "metadata": {}, + "execution_count": 29, + "metadata": { + "collapsed": true + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "23.0654399394989\n" + "14.190996885299683\n" ] } ], @@ -876,8 +1021,10 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 30, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "balance_daily = pd.read_excel('balance_daily_user8.xlsx', index_col=0)\n" @@ -885,7 +1032,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -920,7 +1067,6 @@ " 1998-03-01\n", " 1998-06-01\n", " ...\n", - " 2020-06-01\n", " 2020-09-01\n", " 2020-12-01\n", " 2021-03-01\n", @@ -930,6 +1076,7 @@ " 2022-03-01\n", " 2022-06-01\n", " 2022-09-01\n", + " 2022-12-01\n", " \n", " \n", " \n", @@ -946,7 +1093,6 @@ " 64.2500\n", " 71.59350\n", " ...\n", - " 12.19\n", " 13.07\n", " 12.29\n", " 15.77\n", @@ -956,6 +1102,7 @@ " 17.96\n", " 18.25\n", " 12.68\n", + " 12.04\n", " \n", " \n", " 1075\n", @@ -970,7 +1117,6 @@ " 42.3750\n", " 44.43700\n", " ...\n", - " 75.79\n", " 73.29\n", " 74.55\n", " 79.95\n", @@ -980,6 +1126,7 @@ " 70.59\n", " 78.10\n", " 73.12\n", + " NaN\n", " \n", " \n", " 1078\n", @@ -994,7 +1141,6 @@ " 32.7500\n", " 37.65600\n", " ...\n", - " 78.91\n", " 91.43\n", " 108.83\n", " 109.49\n", @@ -1004,6 +1150,7 @@ " 140.74\n", " 118.36\n", " 108.65\n", + " 96.76\n", " \n", " \n", " 1161\n", @@ -1018,7 +1165,6 @@ " 8.8750\n", " 14.53100\n", " ...\n", - " 45.48\n", " 52.61\n", " 81.99\n", " 91.71\n", @@ -1028,6 +1174,7 @@ " 143.90\n", " 109.34\n", " 76.47\n", + " NaN\n", " \n", " \n", " 1177\n", @@ -1090,7 +1237,6 @@ " NaN\n", " NaN\n", " ...\n", - " 1.42\n", " 1.72\n", " 1.51\n", " 2.05\n", @@ -1100,6 +1246,7 @@ " 3.18\n", " NaN\n", " NaN\n", + " NaN\n", " \n", " \n", " 287882\n", @@ -1114,7 +1261,6 @@ " NaN\n", " NaN\n", " ...\n", - " 9.51\n", " 11.13\n", " 9.84\n", " 12.91\n", @@ -1124,6 +1270,7 @@ " 12.89\n", " 18.60\n", " 10.86\n", + " NaN\n", " \n", " \n", " 294524\n", @@ -1138,7 +1285,6 @@ " NaN\n", " NaN\n", " ...\n", - " 49.63\n", " 65.72\n", " 70.49\n", " 91.66\n", @@ -1148,6 +1294,7 @@ " 92.23\n", " 102.82\n", " 87.46\n", + " NaN\n", " \n", " \n", " 312009\n", @@ -1162,7 +1309,6 @@ " NaN\n", " NaN\n", " ...\n", - " 9.61\n", " 6.98\n", " 6.50\n", " 6.22\n", @@ -1172,6 +1318,7 @@ " 7.07\n", " 8.80\n", " 8.86\n", + " NaN\n", " \n", " \n", " 316056\n", @@ -1186,7 +1333,6 @@ " NaN\n", " NaN\n", " ...\n", - " 92.02\n", " 102.22\n", " 98.91\n", " 116.38\n", @@ -1196,10 +1342,11 @@ " 132.44\n", " 109.78\n", " 97.90\n", + " NaN\n", " \n", " \n", "\n", - "

644 rows × 107 columns

\n", + "

858 rows × 108 columns

\n", "" ], "text/plain": [ @@ -1229,36 +1376,36 @@ "312009 NaN NaN NaN NaN NaN ... \n", "316056 NaN NaN NaN NaN NaN ... \n", "\n", - " 2020-06-01 2020-09-01 2020-12-01 2021-03-01 2021-06-01 \\\n", - "1045 12.19 13.07 12.29 15.77 23.90 \n", - "1075 75.79 73.29 74.55 79.95 81.35 \n", - "1078 78.91 91.43 108.83 109.49 119.84 \n", - "1161 45.48 52.61 81.99 91.71 78.50 \n", + " 2020-09-01 2020-12-01 2021-03-01 2021-06-01 2021-09-01 \\\n", + "1045 13.07 12.29 15.77 23.90 21.21 \n", + "1075 73.29 74.55 79.95 81.35 81.97 \n", + "1078 91.43 108.83 109.49 119.84 115.93 \n", + "1161 52.61 81.99 91.71 78.50 93.93 \n", "1177 NaN NaN NaN NaN NaN \n", "... ... ... ... ... ... \n", - "270281 1.42 1.72 1.51 2.05 1.73 \n", - "287882 9.51 11.13 9.84 12.91 12.82 \n", - "294524 49.63 65.72 70.49 91.66 104.05 \n", - "312009 9.61 6.98 6.50 6.22 7.15 \n", - "316056 92.02 102.22 98.91 116.38 125.62 \n", + "270281 1.72 1.51 2.05 1.73 2.03 \n", + "287882 11.13 9.84 12.91 12.82 14.62 \n", + "294524 65.72 70.49 91.66 104.05 102.87 \n", + "312009 6.98 6.50 6.22 7.15 9.00 \n", + "316056 102.22 98.91 116.38 125.62 139.30 \n", "\n", - " 2021-09-01 2021-12-01 2022-03-01 2022-06-01 2022-09-01 \n", - "1045 21.21 20.52 17.96 18.25 12.68 \n", - "1075 81.97 72.36 70.59 78.10 73.12 \n", - "1078 115.93 118.13 140.74 118.36 108.65 \n", - "1161 93.93 102.90 143.90 109.34 76.47 \n", + " 2021-12-01 2022-03-01 2022-06-01 2022-09-01 2022-12-01 \n", + "1045 20.52 17.96 18.25 12.68 12.04 \n", + "1075 72.36 70.59 78.10 73.12 NaN \n", + "1078 118.13 140.74 118.36 108.65 96.76 \n", + "1161 102.90 143.90 109.34 76.47 NaN \n", "1177 NaN NaN NaN NaN NaN \n", "... ... ... ... ... ... \n", - "270281 2.03 4.06 3.18 NaN NaN \n", - "287882 14.62 14.35 12.89 18.60 10.86 \n", - "294524 102.87 93.85 92.23 102.82 87.46 \n", - "312009 9.00 9.37 7.07 8.80 8.86 \n", - "316056 139.30 132.18 132.44 109.78 97.90 \n", + "270281 4.06 3.18 NaN NaN NaN \n", + "287882 14.35 12.89 18.60 10.86 NaN \n", + "294524 93.85 92.23 102.82 87.46 NaN \n", + "312009 9.37 7.07 8.80 8.86 NaN \n", + "316056 132.18 132.44 109.78 97.90 NaN \n", "\n", - "[644 rows x 107 columns]" + "[858 rows x 108 columns]" ] }, - "execution_count": 19, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -1289,8 +1436,10 @@ }, { "cell_type": "code", - "execution_count": 20, - "metadata": {}, + "execution_count": 32, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# set initial capital to 1 million and transaction percet to 0.1%\n", @@ -1301,16 +1450,16 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(3870, 4)" + "(16319, 5)" ] }, - "execution_count": 21, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1321,8 +1470,10 @@ }, { "cell_type": "code", - "execution_count": 22, - "metadata": {}, + "execution_count": 38, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "#construct an empty matrix\n", @@ -1365,6 +1516,7 @@ " balance_price_full = []\n", " balance_price_full = balance_daily[tradedate_full]\n", " balance_price = balance_daily[tradedate]\n", + " balance_price.replace([0, np.inf, -np.inf], np.nan, inplace=True)\n", " for j in range(len(tradedate) - 1):\n", " i = tradedate[j]\n", " #current capital\n", @@ -1436,9 +1588,21 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 39, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_3642/1845856610.py:41: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " balance_price.replace([0, np.inf, -np.inf], np.nan, inplace=True)\n" + ] + } + ], "source": [ "#calculate mean variance allocation portolio\n", "(balance_share_meanv,\n", @@ -1447,21 +1611,21 @@ " portfolio_meanv,\n", " portfolio_cost_meanv,\n", " portfolio_return_meanv,\n", - " portfolio_cumsum_meanv)=cal_portfolio(stocks_name,tradedate, weight_meanv, capital, transaction_percent)\n" + " portfolio_cumsum_meanv)=cal_portfolio(stocks_name,tradedate_full, weight_meanv, capital, transaction_percent)\n" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.2618634791131256" + "-0.6256100382905669" ] }, - "execution_count": 24, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -1472,9 +1636,21 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 41, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_3642/1845856610.py:41: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " balance_price.replace([0, np.inf, -np.inf], np.nan, inplace=True)\n" + ] + } + ], "source": [ "#calculate minimum variance allocation portolio\n", "(balance_share_minv,\n", @@ -1483,14 +1659,26 @@ " portfolio_minv,\n", " portfolio_cost_minv,\n", " portfolio_return_minv,\n", - " portfolio_cumsum_minv)=cal_portfolio(stocks_name,tradedate, weight_minv, capital, transaction_percent)" + " portfolio_cumsum_minv)=cal_portfolio(stocks_name,tradedate_full, weight_minv, capital, transaction_percent)" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 42, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_3642/1845856610.py:41: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " balance_price.replace([0, np.inf, -np.inf], np.nan, inplace=True)\n" + ] + } + ], "source": [ "#calculate equally weighted allocation portolio\n", "(balance_share_equal,\n", @@ -1499,21 +1687,21 @@ " portfolio_equal,\n", " portfolio_cost_equal,\n", " portfolio_return_equal,\n", - " portfolio_cumsum_equal)=cal_portfolio(stocks_name,tradedate, weight_equal, capital, transaction_percent)" + " portfolio_cumsum_equal)=cal_portfolio(stocks_name,tradedate_full, weight_equal, capital, transaction_percent)" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.3362423821397683" + "-0.6039422883904094" ] }, - "execution_count": 27, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -1522,44 +1710,6 @@ "sum(portfolio_return_equal)" ] }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['2018-03-01',\n", - " '2018-06-01',\n", - " '2018-09-01',\n", - " '2018-12-01',\n", - " '2019-03-01',\n", - " '2019-06-01',\n", - " '2019-09-01',\n", - " '2019-12-01',\n", - " '2020-03-01',\n", - " '2020-06-01',\n", - " '2020-09-01',\n", - " '2020-12-01',\n", - " '2021-03-01',\n", - " '2021-06-01',\n", - " '2021-09-01',\n", - " '2021-12-01',\n", - " '2022-03-01',\n", - " '2022-06-01',\n", - " '2022-09-01']" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tradedate" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -1569,7 +1719,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 54, "metadata": {}, "outputs": [], "source": [ @@ -1580,7 +1730,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ @@ -1589,7 +1739,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ @@ -1598,7 +1748,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ @@ -1607,7 +1757,53 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2677.669922\n", + "1 2734.620117\n", + "2 2901.520020\n", + "3 2760.169922\n", + "4 2803.689941\n", + "5 2752.060059\n", + "6 2926.459961\n", + "7 3140.979980\n", + "8 2954.219971\n", + "9 3055.729980\n", + "10 3526.649902\n", + "11 3662.449951\n", + "12 3901.820068\n", + "13 4202.040039\n", + "14 4524.089844\n", + "15 4513.040039\n", + "16 4306.259766\n", + "Name: close, dtype: float64" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spx" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "tradedate = tradedate[:-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 59, "metadata": {}, "outputs": [], "source": [ @@ -1642,16 +1838,16 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 60, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.3589495840431508" + "0.8455213218848242" ] }, - "execution_count": 34, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } @@ -1669,7 +1865,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1679,7 +1875,7 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtime_ind\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m/home/wenbiaolin/ML4StockRec_using_gvkey/fundamental_back_testing.ipynb Cell 42\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m time_ind\u001b[39m.\u001b[39mhead()\n", "\u001b[0;31mNameError\u001b[0m: name 'time_ind' is not defined" ] } @@ -1697,100 +1893,18 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 65, "metadata": {}, "outputs": [], "source": [ - "time_ind = tradedate" + "time_ind = tradedate_full[:-1]" ] }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 66, "metadata": {}, "outputs": [ - { - "data": { - "text/html": [ - " \n", - " " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "application/vnd.plotly.v1+json": { @@ -1826,25 +1940,26 @@ "2022-09-01" ], "y": [ - 998169.8682883774, - 978517.3697772473, - 1059184.1072124462, - 1084144.3282912015, - 910971.1389600794, - 1007681.1598663231, - 1010369.5383078569, - 987943.0709733691, - 1117031.829369674, - 758732.1455078851, - 970815.4700971642, - 993859.458400753, - 1220237.3470690495, - 1392506.8056284683, - 1467397.265600469, - 1446242.5045718001, - 1457974.6215145695, - 1362297.8389132002, - 1068964.4232 + 998097.6373722198, + 1023339.4722018229, + 1004583.3065904576, + 1031019.6217769366, + 809909.7023460879, + 949091.4082848827, + 935672.7535135892, + 950925.9353386193, + 1082048.282080676, + 760624.137775737, + 958583.6574496945, + 1028353.2224276176, + 1252929.3533126737, + 1413438.401572646, + 1504081.908906326, + 1459661.9218582301, + 1514824.4380524335, + 1510132.2849140002, + 1162363.9356803, + 25787.75 ] }, { @@ -1873,25 +1988,26 @@ "2022-09-01" ], "y": [ - 998967.5243363113, - 981656.782363495, - 1002093.5958363946, - 1025279.9978075742, - 852960.5426584905, - 965310.80165797, - 970721.8329451823, - 949365.083501407, - 1019321.6109255173, - 703796.3635158397, - 896874.2848335523, - 975292.9920188024, - 1196701.1319587466, - 1366093.765435214, - 1439036.7272038234, - 1412478.6554720271, - 1478830.9179238824, - 1424229.4466146997, - 1171977.7292 + 998949.8181730722, + 980399.1316273996, + 998854.0858749872, + 1024825.8053336296, + 854417.3357285586, + 980499.5171671169, + 973611.4804992174, + 958861.2789376328, + 1038140.6236854413, + 724188.7089781559, + 916285.3825595068, + 977104.7060194196, + 1187790.756345991, + 1338886.6557853844, + 1424632.4071524558, + 1381438.4756382285, + 1423259.0638506, + 1387696.934248, + 1136045.7860163334, + 111673.29999999999 ] }, { @@ -1920,25 +2036,26 @@ "2022-09-01" ], "y": [ - 998918.9080804393, - 987579.5852179282, - 1009580.0109353173, - 1052959.6538395123, - 875778.4242493137, - 985919.5572947458, - 987431.6535367251, - 969488.7031053505, - 1030064.5954796389, - 717598.9786928936, - 913159.3419426266, - 985032.4748272595, - 1209366.2029215146, - 1380503.506009758, - 1456589.403747596, - 1433425.136001657, - 1500565.2090683258, - 1464990.8531060999, - 1207466.6736333333 + 998895.8909723532, + 992374.6853119333, + 1008796.3304465894, + 1033877.2725289491, + 864068.4192988311, + 995085.7233387289, + 976002.05972606, + 947259.1794897235, + 1032189.032513042, + 715572.3709715783, + 903789.2488963603, + 954037.5797818311, + 1161121.0697542438, + 1309443.9512316308, + 1392765.2173538767, + 1351914.1622707942, + 1391383.3055489, + 1364547.2558339997, + 1131390.3898183997, + 104749.20999999999 ] }, { @@ -1968,24 +2085,92 @@ ], "y": [ 1000000, - 1020825.6331307443, - 1079545.0964922423, - 1024260.889674335, - 1040159.8852320837, - 1020646.0185876773, - 1081470.5534677668, - 1155331.9233889321, - 1082294.0983676286, - 1118247.4336416188, - 1267569.0302473544, - 1314569.2455391786, - 1395215.8640000108, - 1494898.7946529207, - 1601313.9812369465, - 1597393.2966024475, - 1520688.8150717698, - 1444666.1334546707, - 1391251.8476192872 + 1015420.4215210223, + 901577.66175118, + 906222.2329908399, + 900080.8861314631, + 845560.1663892802, + 706118.0063925646, + 721381.9911742795, + 639771.4711544869, + 721064.9863496653, + 752840.1612591005, + 796535.1595781911, + 855691.12879557, + 829154.9188806105, + 817691.3192282643, + 876346.3705444649, + 890131.5083541947, + 884067.531932604, + 898085.6429648924, + 928678.2863560262, + 947787.7721022192, + 943711.196275488, + 961923.0628269009, + 1020945.1660701773, + 1025645.5162149781, + 1114548.451961276, + 1067402.9019714282, + 1072555.6604772124, + 951236.9149655188, + 998616.0367663883, + 907109.4708895985, + 388523.13879740227, + 345648.1248933483, + 421818.49492525955, + 445135.91674057767, + 489623.01719070674, + 492629.09030244197, + 471924.78206550394, + 476109.8398905623, + 525762.8900103732, + 566114.8640599039, + 569654.8713096414, + 517566.6533542639, + 534267.4134848869, + 584622.9066905852, + 540686.108427394, + 590096.5576499363, + 594096.7443247958, + 634018.8170999325, + 677773.4946512762, + 678699.0607562413, + 743659.6056120741, + 765112.0901678336, + 790616.223033034, + 822108.7647782513, + 842154.6111973216, + 862587.2383262035, + 865540.4988961497, + 776049.0756745328, + 845724.9010333614, + 792596.479097556, + 838272.1945614931, + 865893.3362604625, + 873884.070175674, + 948610.3881477184, + 961921.8726763312, + 979979.1449241162, + 1041424.8497400946, + 1055212.3818667624, + 1077187.8478065385, + 1139149.3526021626, + 1080812.773046224, + 1097589.590018005, + 1076998.3163167306, + 1141181.1186434878, + 1219120.5507259427, + 1142050.1334188452, + 1179988.5379693666, + 1337554.5355878528, + 1387149.7447141893, + 1472249.0550697441, + 1577435.7177554606, + 1689726.3402575864, + 1685589.1852858684, + 1604649.6666300267, + 1524429.591684949, + 1468066.175902882 ] } ], @@ -2004,6 +2189,11 @@ "line": { "color": "#E5ECF6", "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 } }, "type": "bar" @@ -2015,6 +2205,11 @@ "line": { "color": "#E5ECF6", "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 } }, "type": "barpolar" @@ -2213,9 +2408,10 @@ "histogram": [ { "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 } }, "type": "histogram" @@ -2351,11 +2547,10 @@ ], "scatter": [ { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 }, "type": "scatter" } @@ -2533,6 +2728,7 @@ "arrowhead": 0, "arrowwidth": 1 }, + "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, @@ -2811,23 +3007,9 @@ } }, "text/html": [ - "
\n", - " \n", - " \n", - "
\n", - " \n", - "
" + " }) }; }); " ] }, "metadata": {}, @@ -2886,7 +3064,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 67, "metadata": {}, "outputs": [ { @@ -2925,24 +3103,25 @@ ], "y": [ 0, - -0.019688531116281282, - 0.08243771641331424, - 0.023565516994439692, - -0.1597325972309175, - 0.1061614542658763, - 0.002667885982794803, - -0.022196301931318264, - 0.1306641669839543, - -0.32076049620177116, - 0.2795233151052448, - 0.0237367337185948, - 0.22777656011099146, - 0.14117700869687494, - 0.05378103695385604, - -0.014416519319335226, - 0.008112136730653618, - -0.06562307820007086, - -0.21532252884377515 + 0.02528994547673663, + -0.01832839064734743, + 0.026315702254901603, + -0.21445752802431756, + 0.17184842401026096, + -0.014138421920331748, + 0.016301833913355053, + 0.13788912665986414, + -0.29705157304706487, + 0.2602593184234761, + 0.07278401257491145, + 0.2183842341203567, + 0.1281070220245025, + 0.06412978961999784, + -0.029532957470644147, + 0.0377912963050913, + -0.0030974897292163554, + -0.23028999029280742, + -0.9778143925422917 ] }, { @@ -2972,24 +3151,25 @@ ], "y": [ 0, - -0.011351594980118283, - 0.02227711674754221, - 0.04296800890897818, - -0.1682697232929343, - 0.12576369775247803, - 0.0015336912943773044, - -0.01817133405345837, - 0.06248230864398826, - -0.30334565245517336, - 0.2725204035350582, - 0.07870820522049553, - 0.2277424692354386, - 0.14150991045956143, - 0.05511459942449454, - -0.015903086817974053, - 0.04683891148577655, - -0.023707304252584542, - -0.17578552038516773 + -0.006528413740967504, + 0.01654782752695298, + 0.024862245554815286, + -0.16424469106932924, + 0.1516283908931828, + -0.01917790916408591, + -0.029449610223572602, + 0.08965851676314089, + -0.30674290422424455, + 0.26302982837253475, + 0.05559739833908219, + 0.21706009738082693, + 0.12774109896117902, + 0.06363102906686155, + -0.02933089839843624, + 0.029195006886983142, + -0.019287316160742286, + -0.17086756432857764, + -0.9074155031343218 ] }, { @@ -3019,24 +3199,25 @@ ], "y": [ 0, - -0.017328633365050694, - 0.020818695332287855, - 0.023137960433553293, - -0.16807062999138395, - 0.1317180026280095, - 0.005605480926887663, - -0.02200089533268119, - 0.0736876978518103, - -0.30954435187849, - 0.27433776490856676, - 0.08743556205293987, - 0.227017052056984, - 0.14154965592721336, - 0.05339528194491913, - -0.01845545094835832, - 0.046975762922011305, - -0.036922051498515615, - -0.177114521830935 + -0.01857018862028424, + 0.0188239194142834, + 0.026001514961909, + -0.16628042416398262, + 0.147565102165265, + -0.007025028107918447, + -0.015149987296802895, + 0.08268072398923629, + -0.30241752181197135, + 0.26525775837129945, + 0.06637596170095289, + 0.21562279766809775, + 0.1272075057261859, + 0.06404257671593072, + -0.030319352063991716, + 0.030273218062100333, + -0.024986406555098595, + -0.181344457871875, + -0.9017000006737453 ] }, { @@ -3066,6 +3247,74 @@ ], "y": [ 0, + 0.015420421521022359, + -0.11211391592786224, + 0.005151604167563888, + -0.006776866242961527, + -0.06057313357304172, + -0.16491098509543442, + 0.021616761849334407, + -0.1131307975777791, + 0.1270664899272264, + 0.04406700576364758, + 0.0580402063646712, + 0.07426661397935691, + -0.031011435110132206, + -0.013825642701152312, + 0.0717325107126723, + 0.015730238947832015, + -0.006812450030897837, + 0.01585638033968322, + 0.03406428287856472, + 0.020577078227138664, + -0.004301148365407913, + 0.019298135513586202, + 0.06135844489456598, + 0.004603920270168108, + 0.0866799828408391, + -0.04230013500703855, + 0.004827379142652915, + -0.11311184116795885, + 0.04980790910809731, + -0.09163338310998546, + -0.5716910127546356, + -0.11035382355029158, + 0.22036968971092805, + 0.055278329650883605, + 0.09994048733671584, + 0.006139566577125166, + -0.042028188437323, + 0.008868061148942749, + 0.10428906516870991, + 0.07674937660346187, + 0.006253160753191119, + -0.0914382033381479, + 0.032267844194343066, + 0.09425147769586494, + -0.0751540826751167, + 0.09138472110233163, + 0.006778868005585903, + 0.06719793225008994, + 0.06901163872624808, + 0.0013655979649091856, + 0.09571332658608728, + 0.02884718276193436, + 0.03333385159239327, + 0.039832906064591965, + 0.02438344812499034, + 0.024262322924091233, + 0.003423723930435997, + -0.1033936867607559, + 0.08978275671324937, + -0.06281998067088922, + 0.05762795655608152, + 0.032950086950478095, + 0.009228312057142194, + 0.08551056200969813, + 0.014032615175767864, + 0.01877207781703161, + 0.06270103311304294, + 0.013239104223515227, 0.020825633130744352, 0.057521540854546034, -0.051210650669010274, @@ -3102,6 +3351,11 @@ "line": { "color": "#E5ECF6", "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 } }, "type": "bar" @@ -3113,6 +3367,11 @@ "line": { "color": "#E5ECF6", "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 } }, "type": "barpolar" @@ -3311,9 +3570,10 @@ "histogram": [ { "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 } }, "type": "histogram" @@ -3449,11 +3709,10 @@ ], "scatter": [ { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 }, "type": "scatter" } @@ -3631,6 +3890,7 @@ "arrowhead": 0, "arrowwidth": 1 }, + "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, @@ -3909,23 +4169,9 @@ } }, "text/html": [ - "
\n", - " \n", - " \n", - "
\n", - " \n", - "
" + " }) }; }); " ] }, "metadata": {}, @@ -3981,7 +4223,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 68, "metadata": {}, "outputs": [ { @@ -4020,24 +4262,25 @@ ], "y": [ 0, - -0.019688531116281282, - 0.06274918529703295, - 0.08631470229147264, - -0.07341789493944487, - 0.03274355932643143, - 0.035411445309226235, - 0.013215143377907971, - 0.14387931036186227, - -0.1768811858399089, - 0.10264212926533592, - 0.12637886298393072, - 0.3541554230949222, - 0.49533243179179715, - 0.5491134687456531, - 0.534696949426318, - 0.5428090861569715, - 0.4771860079569007, - 0.2618634791131256 + 0.02528994547673663, + 0.006961554829389201, + 0.0332772570842908, + -0.18118027094002676, + -0.009331846929765797, + -0.023470268850097543, + -0.0071684349367424904, + 0.13072069172312165, + -0.16633088132394322, + 0.09392843709953289, + 0.16671244967444432, + 0.385096683794801, + 0.5132037058193035, + 0.5773334954393013, + 0.5478005379686572, + 0.5855918342737485, + 0.5824943445445322, + 0.3522043542517248, + -0.6256100382905669 ] }, { @@ -4067,24 +4310,25 @@ ], "y": [ 0, - -0.011351594980118283, - 0.010925521767423925, - 0.0538935306764021, - -0.1143761926165322, - 0.01138750513594583, - 0.012921196430323134, - -0.005250137623135236, - 0.057232171020853026, - -0.24611348143432032, - 0.026406922100737862, - 0.10511512732123339, - 0.33285759655667196, - 0.4743675070162334, - 0.529482106440728, - 0.513579019622754, - 0.5604179311085306, - 0.5367106268559461, - 0.36092510647077836 + -0.006528413740967504, + 0.010019413785985476, + 0.03488165934080076, + -0.12936303172852848, + 0.02226535916465433, + 0.003087450000568419, + -0.026362160223004184, + 0.06329635654013671, + -0.24344654768410784, + 0.019583280688426907, + 0.0751806790275091, + 0.292240776408336, + 0.41998187536951503, + 0.4836129044363766, + 0.4542820060379404, + 0.4834770129249235, + 0.4641896967641812, + 0.29332213243560357, + -0.6140933706987183 ] }, { @@ -4114,24 +4358,25 @@ ], "y": [ 0, - -0.017328633365050694, - 0.003490061967237161, - 0.026628022400790454, - -0.1414426075905935, - -0.009724604962583983, - -0.004119124035696319, - -0.02612001936837751, - 0.04756767848343279, - -0.2619766733950572, - 0.012361091513509559, - 0.09979665356644943, - 0.3268137056234334, - 0.4683633615506468, - 0.5217586434955659, - 0.5033031925472076, - 0.5502789554692189, - 0.5133569039707033, - 0.3362423821397683 + -0.01857018862028424, + 0.0002537307939991622, + 0.026255245755908162, + -0.14002517840807446, + 0.007539923757190536, + 0.0005148956492720897, + -0.014635091647530806, + 0.06804563234170548, + -0.23437188947026588, + 0.030885868901033564, + 0.09726183060198645, + 0.31288462827008423, + 0.4400921339962701, + 0.5041347107122008, + 0.4738153586482091, + 0.5040885767103095, + 0.4791021701552109, + 0.2977577122833359, + -0.6039422883904094 ] }, { @@ -4161,24 +4406,92 @@ ], "y": [ 0, - 0.020825633130744352, - 0.07834717398529038, - 0.02713652331628011, - 0.042658931442986735, - 0.023898482283718794, - 0.0834926351754879, - 0.15178979748316862, - 0.08857175291718634, - 0.12179131433604357, - 0.2553231178649492, - 0.29240213618291233, - 0.3537504590152744, - 0.42519670129575154, - 0.4963822470762823, - 0.4939338299136029, - 0.4459152976624514, - 0.3959230304322153, - 0.3589495840431508 + 0.015420421521022359, + -0.09669349440683989, + -0.091541890239276, + -0.09831875648223753, + -0.15889189005527926, + -0.3238028751507137, + -0.3021861133013793, + -0.4153169108791584, + -0.288250420951932, + -0.2441834151882844, + -0.1861432088236132, + -0.1118765948442563, + -0.1428880299543885, + -0.15671367265554081, + -0.08498116194286852, + -0.0692509229950365, + -0.07606337302593434, + -0.060206992686251126, + -0.026142709807686404, + -0.00556563158054774, + -0.009866779945955653, + 0.009431355567630549, + 0.07078980046219653, + 0.07539372073236464, + 0.16207370357320372, + 0.11977356856616517, + 0.12460094770881808, + 0.011489106540859231, + 0.061297015648956545, + -0.03033636746102892, + -0.6020273802156646, + -0.7123812037659562, + -0.4920115140550281, + -0.4367331844041445, + -0.33679269706742865, + -0.3306531304903035, + -0.37268131892762646, + -0.36381325777868373, + -0.25952419260997384, + -0.18277481600651196, + -0.17652165525332084, + -0.26795985859146876, + -0.2356920143971257, + -0.14144053670126078, + -0.21659461937637747, + -0.12520989827404583, + -0.11843103026845993, + -0.05123309801836999, + 0.017778540707878085, + 0.019144138672787272, + 0.11485746525887455, + 0.1437046480208089, + 0.17703849961320217, + 0.21687140567779414, + 0.2412548538027845, + 0.26551717672687575, + 0.26894090065731174, + 0.16554721389655586, + 0.2553299706098052, + 0.192509989938916, + 0.25013794649499754, + 0.2830880334454756, + 0.2923163455026178, + 0.37782690751231596, + 0.39185952268808383, + 0.41063160050511543, + 0.47333263361815836, + 0.48657173784167357, + 0.5073973709724179, + 0.564918911826964, + 0.5137082611579536, + 0.5292306692846602, + 0.5104702201253923, + 0.5700643730171614, + 0.6383615353248421, + 0.5751434907588598, + 0.608363052177717, + 0.7418948557066227, + 0.7789738740245858, + 0.8403221968569479, + 0.911768439137425, + 0.9829539849179558, + 0.9805055677552764, + 0.9324870355041249, + 0.8824947682738887, + 0.8455213218848242 ] } ], @@ -4197,6 +4510,11 @@ "line": { "color": "#E5ECF6", "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 } }, "type": "bar" @@ -4208,6 +4526,11 @@ "line": { "color": "#E5ECF6", "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 } }, "type": "barpolar" @@ -4406,9 +4729,10 @@ "histogram": [ { "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 } }, "type": "histogram" @@ -4544,11 +4868,10 @@ ], "scatter": [ { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 }, "type": "scatter" } @@ -4726,6 +5049,7 @@ "arrowhead": 0, "arrowwidth": 1 }, + "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, @@ -5004,23 +5328,9 @@ } }, "text/html": [ - "
\n", - " \n", - " \n", - "
\n", - " \n", - "
" + " }) }; }); " ] }, "metadata": {}, @@ -5076,7 +5382,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 69, "metadata": {}, "outputs": [], "source": [ @@ -5094,7 +5400,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 70, "metadata": {}, "outputs": [ { @@ -5134,31 +5440,31 @@ " \n", " \n", " 2018-06-01\n", - " -0.019689\n", - " -0.017329\n", - " -0.011352\n", - " 0.020826\n", + " 0.02529\n", + " -0.01857\n", + " -0.006528\n", + " 0.015420\n", " \n", " \n", " 2018-09-01\n", - " 0.082438\n", - " 0.020819\n", - " 0.022277\n", - " 0.057522\n", + " -0.018328\n", + " 0.018824\n", + " 0.016548\n", + " -0.112114\n", " \n", " \n", " 2018-12-01\n", - " 0.023566\n", - " 0.023138\n", - " 0.042968\n", - " -0.051211\n", + " 0.026316\n", + " 0.026002\n", + " 0.024862\n", + " 0.005152\n", " \n", " \n", " 2019-03-01\n", - " -0.159733\n", - " -0.168071\n", - " -0.16827\n", - " 0.015522\n", + " -0.214458\n", + " -0.16628\n", + " -0.164245\n", + " -0.006777\n", " \n", " \n", "\n", @@ -5167,13 +5473,13 @@ "text/plain": [ " Mean-Var Equally Min-Var SPX\n", "2018-03-01 0 0 0 0.000000\n", - "2018-06-01 -0.019689 -0.017329 -0.011352 0.020826\n", - "2018-09-01 0.082438 0.020819 0.022277 0.057522\n", - "2018-12-01 0.023566 0.023138 0.042968 -0.051211\n", - "2019-03-01 -0.159733 -0.168071 -0.16827 0.015522" + "2018-06-01 0.02529 -0.01857 -0.006528 0.015420\n", + "2018-09-01 -0.018328 0.018824 0.016548 -0.112114\n", + "2018-12-01 0.026316 0.026002 0.024862 0.005152\n", + "2019-03-01 -0.214458 -0.16628 -0.164245 -0.006777" ] }, - "execution_count": 41, + "execution_count": 70, "metadata": {}, "output_type": "execute_result" } @@ -5184,7 +5490,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 71, "metadata": {}, "outputs": [], "source": [ @@ -5200,17 +5506,17 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 72, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "SPX 0.07556833348276859 0.1083325973664983\n", - "Mean-Var 0.05512915349750012 0.2850812319781435\n", - "Equally 0.07078786992416175 0.2729629255366962\n", - "Min-Var 0.07598423294121649 0.26886702213764985\n" + "SPX -0.00522854196153727 0.13913436271090593\n", + "Mean-Var -0.12512200765811338 0.5267684626050293\n", + "Equally -0.12078845767808188 0.4858170592420743\n", + "Min-Var -0.12281867413974365 0.4877126592890122\n" ] } ], @@ -5223,8 +5529,10 @@ }, { "cell_type": "code", - "execution_count": 44, - "metadata": {}, + "execution_count": 75, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "risk_free=0.015" @@ -5232,7 +5540,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 76, "metadata": {}, "outputs": [], "source": [ @@ -5244,17 +5552,17 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 77, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "SPX Sharpe 0.5590961073134877\n", - "Mean-Var Sharpe 0.14076392619411973\n", - "Equally Sharpe 0.2043789273377414\n", - "Min-Var Sharpe 0.22681931185296073\n" + "SPX Sharpe -0.1453885407415006\n", + "Mean-Var Sharpe -0.2660030309429834\n", + "Equally Sharpe -0.27950533044254594\n", + "Min-Var Sharpe -0.2825817036216689\n" ] } ], @@ -5268,28 +5576,101 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [] }, { "cell_type": "code", - "execution_count": 47, - "metadata": {}, + "execution_count": 78, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ - "insample_date=tradedate[1:51]" + "insample_date=tradedate" ] }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 79, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['2018-06-01',\n", + "['2001-03-01',\n", + " '2001-06-01',\n", + " '2001-09-01',\n", + " '2001-12-01',\n", + " '2002-03-01',\n", + " '2002-06-01',\n", + " '2002-09-01',\n", + " '2002-12-01',\n", + " '2003-03-01',\n", + " '2003-06-01',\n", + " '2003-09-01',\n", + " '2003-12-01',\n", + " '2004-03-01',\n", + " '2004-06-01',\n", + " '2004-09-01',\n", + " '2004-12-01',\n", + " '2005-03-01',\n", + " '2005-06-01',\n", + " '2005-09-01',\n", + " '2005-12-01',\n", + " '2006-03-01',\n", + " '2006-06-01',\n", + " '2006-09-01',\n", + " '2006-12-01',\n", + " '2007-03-01',\n", + " '2007-06-01',\n", + " '2007-09-01',\n", + " '2007-12-01',\n", + " '2008-03-01',\n", + " '2008-06-01',\n", + " '2008-09-01',\n", + " '2008-12-01',\n", + " '2009-03-01',\n", + " '2009-06-01',\n", + " '2009-09-01',\n", + " '2009-12-01',\n", + " '2010-03-01',\n", + " '2010-06-01',\n", + " '2010-09-01',\n", + " '2010-12-01',\n", + " '2011-03-01',\n", + " '2011-06-01',\n", + " '2011-09-01',\n", + " '2011-12-01',\n", + " '2012-03-01',\n", + " '2012-06-01',\n", + " '2012-09-01',\n", + " '2012-12-01',\n", + " '2013-03-01',\n", + " '2013-06-01',\n", + " '2013-09-01',\n", + " '2013-12-01',\n", + " '2014-03-01',\n", + " '2014-06-01',\n", + " '2014-09-01',\n", + " '2014-12-01',\n", + " '2015-03-01',\n", + " '2015-06-01',\n", + " '2015-09-01',\n", + " '2015-12-01',\n", + " '2016-03-01',\n", + " '2016-06-01',\n", + " '2016-09-01',\n", + " '2016-12-01',\n", + " '2017-03-01',\n", + " '2017-06-01',\n", + " '2017-09-01',\n", + " '2017-12-01',\n", + " '2018-03-01',\n", + " '2018-06-01',\n", " '2018-09-01',\n", " '2018-12-01',\n", " '2019-03-01',\n", @@ -5309,7 +5690,7 @@ " '2022-09-01']" ] }, - "execution_count": 48, + "execution_count": 79, "metadata": {}, "output_type": "execute_result" } @@ -5320,36 +5701,27 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 80, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "2018-06-01 0.020826\n", - "2018-09-01 0.057522\n", - "2018-12-01 -0.051211\n", - "2019-03-01 0.015522\n", - "2019-06-01 -0.018760\n", - "2019-09-01 0.059594\n", - "2019-12-01 0.068297\n", - "2020-03-01 -0.063218\n", - "2020-06-01 0.033220\n", - "2020-09-01 0.133532\n", - "2020-12-01 0.037079\n", - "2021-03-01 0.061348\n", - "2021-06-01 0.071446\n", - "2021-09-01 0.071186\n", - "2021-12-01 -0.002448\n", - "2022-03-01 -0.048019\n", - "2022-06-01 -0.049992\n", - "2022-09-01 -0.036973\n", - "Name: SPX, dtype: float64" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" + "ename": "KeyError", + "evalue": "\"['2001-03-01', '2001-06-01', '2001-09-01', '2001-12-01', '2002-03-01', '2002-06-01', '2002-09-01', '2002-12-01', '2003-03-01', '2003-06-01', '2003-09-01', '2003-12-01', '2004-03-01', '2004-06-01', '2004-09-01', '2004-12-01', '2005-03-01', '2005-06-01', '2005-09-01', '2005-12-01', '2006-03-01', '2006-06-01', '2006-09-01', '2006-12-01', '2007-03-01', '2007-06-01', '2007-09-01', '2007-12-01', '2008-03-01', '2008-06-01', '2008-09-01', '2008-12-01', '2009-03-01', '2009-06-01', '2009-09-01', '2009-12-01', '2010-03-01', '2010-06-01', '2010-09-01', '2010-12-01', '2011-03-01', '2011-06-01', '2011-09-01', '2011-12-01', '2012-03-01', '2012-06-01', '2012-09-01', '2012-12-01', '2013-03-01', '2013-06-01', '2013-09-01', '2013-12-01', '2014-03-01', '2014-06-01', '2014-09-01', '2014-12-01', '2015-03-01', '2015-06-01', '2015-09-01', '2015-12-01', '2016-03-01', '2016-06-01', '2016-09-01', '2016-12-01', '2017-03-01', '2017-06-01', '2017-09-01', '2017-12-01'] not in index\"", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/home/wenbiaolin/20221117/fundamental_back_testing.ipynb Cell 65\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m quarter_return[\u001b[39m'\u001b[39;49m\u001b[39mSPX\u001b[39;49m\u001b[39m'\u001b[39;49m][insample_date]\n", + "File \u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/series.py:984\u001b[0m, in \u001b[0;36mSeries.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 981\u001b[0m key \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39masarray(key, dtype\u001b[39m=\u001b[39m\u001b[39mbool\u001b[39m)\n\u001b[1;32m 982\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_values(key)\n\u001b[0;32m--> 984\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_get_with(key)\n", + "File \u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/series.py:1024\u001b[0m, in \u001b[0;36mSeries._get_with\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1021\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39miloc[key]\n\u001b[1;32m 1023\u001b[0m \u001b[39m# handle the dup indexing case GH#4246\u001b[39;00m\n\u001b[0;32m-> 1024\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mloc[key]\n", + "File \u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/indexing.py:967\u001b[0m, in \u001b[0;36m_LocationIndexer.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 964\u001b[0m axis \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39maxis \u001b[39mor\u001b[39;00m \u001b[39m0\u001b[39m\n\u001b[1;32m 966\u001b[0m maybe_callable \u001b[39m=\u001b[39m com\u001b[39m.\u001b[39mapply_if_callable(key, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj)\n\u001b[0;32m--> 967\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_getitem_axis(maybe_callable, axis\u001b[39m=\u001b[39;49maxis)\n", + "File \u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/indexing.py:1191\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1188\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(key, \u001b[39m\"\u001b[39m\u001b[39mndim\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mand\u001b[39;00m key\u001b[39m.\u001b[39mndim \u001b[39m>\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m 1189\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mCannot index with multidimensional key\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m-> 1191\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_getitem_iterable(key, axis\u001b[39m=\u001b[39;49maxis)\n\u001b[1;32m 1193\u001b[0m \u001b[39m# nested tuple slicing\u001b[39;00m\n\u001b[1;32m 1194\u001b[0m \u001b[39mif\u001b[39;00m is_nested_tuple(key, labels):\n", + "File \u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/indexing.py:1132\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_iterable\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1129\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_validate_key(key, axis)\n\u001b[1;32m 1131\u001b[0m \u001b[39m# A collection of keys\u001b[39;00m\n\u001b[0;32m-> 1132\u001b[0m keyarr, indexer \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_get_listlike_indexer(key, axis)\n\u001b[1;32m 1133\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj\u001b[39m.\u001b[39m_reindex_with_indexers(\n\u001b[1;32m 1134\u001b[0m {axis: [keyarr, indexer]}, copy\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m, allow_dups\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m\n\u001b[1;32m 1135\u001b[0m )\n", + "File \u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/indexing.py:1327\u001b[0m, in \u001b[0;36m_LocIndexer._get_listlike_indexer\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1324\u001b[0m ax \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj\u001b[39m.\u001b[39m_get_axis(axis)\n\u001b[1;32m 1325\u001b[0m axis_name \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj\u001b[39m.\u001b[39m_get_axis_name(axis)\n\u001b[0;32m-> 1327\u001b[0m keyarr, indexer \u001b[39m=\u001b[39m ax\u001b[39m.\u001b[39;49m_get_indexer_strict(key, axis_name)\n\u001b[1;32m 1329\u001b[0m \u001b[39mreturn\u001b[39;00m keyarr, indexer\n", + "File \u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py:5782\u001b[0m, in \u001b[0;36mIndex._get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m 5779\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 5780\u001b[0m keyarr, indexer, new_indexer \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_reindex_non_unique(keyarr)\n\u001b[0;32m-> 5782\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_raise_if_missing(keyarr, indexer, axis_name)\n\u001b[1;32m 5784\u001b[0m keyarr \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtake(indexer)\n\u001b[1;32m 5785\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(key, Index):\n\u001b[1;32m 5786\u001b[0m \u001b[39m# GH 42790 - Preserve name from an Index\u001b[39;00m\n", + "File \u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py:5845\u001b[0m, in \u001b[0;36mIndex._raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m 5842\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mNone of [\u001b[39m\u001b[39m{\u001b[39;00mkey\u001b[39m}\u001b[39;00m\u001b[39m] are in the [\u001b[39m\u001b[39m{\u001b[39;00maxis_name\u001b[39m}\u001b[39;00m\u001b[39m]\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 5844\u001b[0m not_found \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(ensure_index(key)[missing_mask\u001b[39m.\u001b[39mnonzero()[\u001b[39m0\u001b[39m]]\u001b[39m.\u001b[39munique())\n\u001b[0;32m-> 5845\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mnot_found\u001b[39m}\u001b[39;00m\u001b[39m not in index\u001b[39m\u001b[39m\"\u001b[39m)\n", + "\u001b[0;31mKeyError\u001b[0m: \"['2001-03-01', '2001-06-01', '2001-09-01', '2001-12-01', '2002-03-01', '2002-06-01', '2002-09-01', '2002-12-01', '2003-03-01', '2003-06-01', '2003-09-01', '2003-12-01', '2004-03-01', '2004-06-01', '2004-09-01', '2004-12-01', '2005-03-01', '2005-06-01', '2005-09-01', '2005-12-01', '2006-03-01', '2006-06-01', '2006-09-01', '2006-12-01', '2007-03-01', '2007-06-01', '2007-09-01', '2007-12-01', '2008-03-01', '2008-06-01', '2008-09-01', '2008-12-01', '2009-03-01', '2009-06-01', '2009-09-01', '2009-12-01', '2010-03-01', '2010-06-01', '2010-09-01', '2010-12-01', '2011-03-01', '2011-06-01', '2011-09-01', '2011-12-01', '2012-03-01', '2012-06-01', '2012-09-01', '2012-12-01', '2013-03-01', '2013-06-01', '2013-09-01', '2013-12-01', '2014-03-01', '2014-06-01', '2014-09-01', '2014-12-01', '2015-03-01', '2015-06-01', '2015-09-01', '2015-12-01', '2016-03-01', '2016-06-01', '2016-09-01', '2016-12-01', '2017-03-01', '2017-06-01', '2017-09-01', '2017-12-01'] not in index\"" + ] } ], "source": [ @@ -5358,21 +5730,21 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "SPX 0.0797665742318113 0.11107500724034111\n", - "Mean-Var 0.05819188424736124 0.2932657439273469\n", - "Equally 0.07472052936439295 0.28073803260205243\n", - "Min-Var 0.08020557921572852 0.2764998639318288\n", - "SPX Sharpe 0.5830886338964726\n", - "Mean-Var Sharpe 0.14727899572908018\n", - "Equally Sharpe 0.21272689279349302\n", - "Min-Var Sharpe 0.23582499567451864\n" + "SPX 0.001531531093822985 0.21955274735546973\n", + "Mean-Var 0.2565791830967157 0.29155309241664823\n", + "Equally 0.26616405706243595 0.3011567507349648\n", + "Min-Var 0.26844700713719916 0.29823334992044453\n", + "SPX Sharpe -0.061345025595925314\n", + "Mean-Var Sharpe 0.828594137329483\n", + "Equally Sharpe 0.8339977651155983\n", + "Min-Var Sharpe 0.8498278519314074\n" ] } ], @@ -5402,21 +5774,16 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.8.10 64-bit", "language": "python", "name": "python3" }, @@ -5430,7 +5797,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.7" + "version": "3.8.10" }, "vscode": { "interpreter": { diff --git a/ml_model.py b/ml_model.py index 50f364033..dd9e467b9 100644 --- a/ml_model.py +++ b/ml_model.py @@ -18,14 +18,18 @@ from sklearn.model_selection import TimeSeriesSplit, GridSearchCV,RandomizedSearchCV -from keras.models import Sequential -from keras.layers import Dense -from keras.layers import LSTM -from keras.layers import Dropout + +from xgboost import XGBRegressor +from lightgbm import LGBMRegressor +import time import os import errno +from multiprocessing import cpu_count + +n_cpus = cpu_count() - 1 + def prepare_rolling_train(df,features_column,label_column,date_column,unique_datetime,testing_windows,first_trade_date_index, max_rolling_window_index,current_index): if current_index <=max_rolling_window_index: @@ -117,9 +121,12 @@ def train_random_forest(X_train, y_train): # scoring_method = 'neg_mean_absolute_error' scoring_method = 'neg_mean_squared_error' #scoring_method = 'neg_mean_squared_log_error' - + n_models = 1 + for key, val in random_grid.items(): + n_models *= len(val) + n_jobs_per_model = min(max(1, n_cpus//n_models), n_cpus) # my_cv_rf = TimeSeriesSplit(n_splits=5).split(X_train_rf) - rf = RandomForestRegressor(random_state=42) + rf = RandomForestRegressor(random_state=42, n_jobs= n_jobs_per_model) #RandomizedSearchCV #randomforest_regressor = RandomizedSearchCV(estimator=rf, # param_distributions=random_grid, @@ -132,7 +139,7 @@ def train_random_forest(X_train, y_train): randomforest_regressor = GridSearchCV(estimator=rf, param_grid=random_grid, cv=3, - n_jobs=-1, + n_jobs=n_cpus // n_jobs_per_model, scoring=scoring_method, verbose=0) @@ -171,18 +178,24 @@ def train_svm(X_train, y_train): return model -def train_gbm(X_train, y_train): - gbm = GradientBoostingRegressor(random_state = 42) +def train_lightgbm(X_train, y_train): + + # model = gbm.fit(X_train, y_train) param_grid_gbm = {'learning_rate': [0.1, 0.01, 0.001], 'n_estimators': [100, 250, 500,1000]} + n_models = 1 + for key, val in param_grid_gbm.items(): + n_models *= len(val) + n_jobs_per_model = min(max(1, n_cpus//n_models), n_cpus) + lightgbm = LGBMRegressor(random_state = 42, n_jobs=n_jobs_per_model) # scoring_method = 'r2' # scoring_method = 'explained_variance' # scoring_method = 'neg_mean_absolute_error' scoring_method = 'neg_mean_squared_error' #scoring_method = 'neg_mean_squared_log_error' - gbm_regressor = GridSearchCV(estimator=gbm, param_grid=param_grid_gbm, - cv=3, n_jobs=-1, scoring=scoring_method, verbose=0) + gbm_regressor = GridSearchCV(estimator=lightgbm, param_grid=param_grid_gbm, + cv=3, n_jobs=n_cpus // n_jobs_per_model, scoring=scoring_method, verbose=0) gbm_regressor.fit(X_train, y_train) model = gbm_regressor.best_estimator_ @@ -195,7 +208,31 @@ def train_gbm(X_train, y_train): +def train_xgb(X_train, y_train): + xgb = XGBRegressor(random_state = 42, n_jobs=10) + + param_grid_gbm = {'learning_rate': [0.1, 0.01, 0.001], 'n_estimators': [100, 250, 500,1000]} + n_models = 1 + for key, val in param_grid_gbm.items(): + n_models *= len(val) + n_jobs_per_model = min(max(1, n_cpus//n_models), n_cpus) + xgb = XGBRegressor(random_state = 42, n_jobs=n_jobs_per_model) + # scoring_method = 'r2' + # scoring_method = 'explained_variance' + # scoring_method = 'neg_mean_absolute_error' + scoring_method = 'neg_mean_squared_error' + #scoring_method = 'neg_mean_squared_log_error' + xgb_regressor = GridSearchCV(estimator=xgb, param_grid=param_grid_gbm, + cv=3, n_jobs=n_cpus // n_jobs_per_model, scoring=scoring_method, verbose=0) + xgb_regressor.fit(X_train, y_train) + model = xgb_regressor.best_estimator_ + ''' + + gbm_regressor = GradientBoostingRegressor() + model = gbm_regressor.fit(X_train, y_train) + ''' + return model def train_ada(X_train, y_train): ada = AdaBoostRegressor() @@ -255,18 +292,17 @@ def run_4model(df,features_column, label_column,date_column,tic_column, max_rolling_window_index=44): ## initialize all the result tables ## need date as index and unique tic name as columns - df_predict_lr = pd.DataFrame(columns=unique_ticker, index=trade_date) df_predict_rf = pd.DataFrame(columns=unique_ticker, index=trade_date) - df_predict_ridge = pd.DataFrame(columns=unique_ticker, index=trade_date) df_predict_gbm = pd.DataFrame(columns=unique_ticker, index=trade_date) - + df_predict_xgb = pd.DataFrame(columns=unique_ticker, index=trade_date) df_predict_best = pd.DataFrame(columns=unique_ticker, index=trade_date) df_best_model_name = pd.DataFrame(columns=['model_name'], index=trade_date) evaluation_record = {} # first trade date is 1995-06-01 # fist_trade_date_index = 20 # testing_windows = 6 - + import re + df = df.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x)) for i in range(first_trade_date_index, len(unique_datetime)): try: # prepare training data @@ -303,33 +339,38 @@ def run_4model(df,features_column, label_column,date_column,tic_column, current_index=i) # Training - lr_model = train_linear_regression(X_train, y_train) + # lr_model = train_linear_regression(X_train, y_train) + + t = time.perf_counter() + xgb_model = train_xgb(X_train, y_train) + print(f"xgb:{time.perf_counter() - t}s") + t = time.perf_counter() + gbm_model = train_lightgbm(X_train, y_train) + print(f"gbm:{time.perf_counter() - t}s") + t =time.perf_counter() rf_model = train_random_forest(X_train, y_train) - ridge_model = train_ridge(X_train, y_train) - gbm_model = train_gbm(X_train, y_train) + print(f"rf:{time.perf_counter() - t}s") + # ridge_model = train_ridge(X_train, y_train) + # Validation - lr_eval = evaluate_model(lr_model, X_test, y_test) rf_eval = evaluate_model(rf_model, X_test, y_test) - ridge_eval = evaluate_model(ridge_model, X_test, y_test) - gbm_eval = evaluate_model(gbm_model, X_test, y_test) - + xgb_eval = evaluate_model(xgb_model, X_test, y_test) + gbm_eval = evaluate_model(gbm_model, X_test ,y_test) # Trading - y_trade_lr = lr_model.predict(X_trade) + y_trade_rf = rf_model.predict(X_trade) - y_trade_ridge = ridge_model.predict(X_trade) - y_trade_gbm = gbm_model.predict(X_trade) - - + y_trade_xgb = xgb_model.predict(X_trade) + y_trade_gbm = gbm_model.predict(X_trade) # Decide the best model - eval_data = [[lr_eval, y_trade_lr], + eval_data = [ [rf_eval, y_trade_rf] , - [ridge_eval, y_trade_ridge], + [xgb_eval, y_trade_xgb], [gbm_eval, y_trade_gbm] ] eval_table = pd.DataFrame(eval_data, columns=['model_eval', 'model_predict_return'], - index=['lr', 'rf','ridge','gbm']) + index=['rf', 'xgb', 'gbm']) evaluation_record[unique_datetime[i]]=eval_table @@ -345,11 +386,9 @@ def run_4model(df,features_column, label_column,date_column,tic_column, df_best_model_name.loc[unique_datetime[i]] = best_model_name # Prepare Predicted Return table - append_return_table(df_predict_lr, unique_datetime, y_trade_lr, trade_tic, current_index=i) append_return_table(df_predict_rf, unique_datetime, y_trade_rf, trade_tic, current_index=i) - append_return_table(df_predict_ridge, unique_datetime, y_trade_ridge, trade_tic, current_index=i) + append_return_table(df_predict_xgb, unique_datetime, y_trade_xgb, trade_tic, current_index=i) append_return_table(df_predict_gbm, unique_datetime, y_trade_gbm, trade_tic, current_index=i) - append_return_table(df_predict_best, unique_datetime, y_trade_best, trade_tic, current_index=i) print('Trade Date: ', unique_datetime[i]) @@ -357,10 +396,10 @@ def run_4model(df,features_column, label_column,date_column,tic_column, except Exception: traceback.print_exc() df_evaluation = get_model_evaluation_table(evaluation_record,trade_date) - return (df_predict_lr, + return ( df_predict_rf, - df_predict_ridge, df_predict_gbm, + df_predict_xgb, df_predict_best, df_best_model_name, evaluation_record, @@ -374,20 +413,19 @@ def get_model_evaluation_table(evaluation_record,trade_date): evaluation_list.append(evaluation_record[d]['model_eval'].values) except: print('error') - df_evaluation = pd.DataFrame(evaluation_list,columns = ['linear_regression', 'random_forest','ridge','gbm']) + df_evaluation = pd.DataFrame(evaluation_list,columns = ['rf', 'xgb', 'gbm']) df_evaluation.index = trade_date return df_evaluation def save_model_result(sector_result,sector_name): - df_predict_lr = sector_result[0].astype(np.float64) - df_predict_rf = sector_result[1].astype(np.float64) - df_predict_ridge = sector_result[2].astype(np.float64) - df_predict_gbm = sector_result[3].astype(np.float64) - df_predict_best = sector_result[4].astype(np.float64) - - df_best_model_name = sector_result[5] - df_evaluation_score = sector_result[6] - df_model_score = sector_result[7] + df_predict_rf = sector_result[0].astype(np.float64) + df_predict_gbm = sector_result[1].astype(np.float64) + df_predict_xgb = sector_result[2].astype(np.float64) + df_predict_best = sector_result[3].astype(np.float64) + + df_best_model_name = sector_result[4] + df_evaluation_score = sector_result[5] + df_model_score = sector_result[6] @@ -400,10 +438,9 @@ def save_model_result(sector_result,sector_name): raise - df_predict_lr.to_csv('results/'+sector_name+'/df_predict_lr.csv') df_predict_rf.to_csv('results/'+sector_name+'/df_predict_rf.csv') - df_predict_ridge.to_csv('results/'+sector_name+'/df_predict_ridge.csv') df_predict_gbm.to_csv('results/'+sector_name+'/df_predict_gbm.csv') + df_predict_xgb.to_csv('results/'+sector_name+'/df_predict_xgb.csv') df_predict_best.to_csv('results/'+sector_name+'/df_predict_best.csv') df_best_model_name.to_csv('results/'+sector_name+'/df_best_model_name.csv') #df_evaluation_score.to_csv('results/'+sector_name+'/df_evaluation_score.csv')