diff --git a/fundamental_back_testing.ipynb b/fundamental_back_testing.ipynb
index c7e531fc3..72cc597f6 100644
--- a/fundamental_back_testing.ipynb
+++ b/fundamental_back_testing.ipynb
@@ -2,8 +2,10 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
- "metadata": {},
+ "execution_count": 14,
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [],
"source": [
"import numpy as np\n",
@@ -39,19 +41,19 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
- "data_adj = pd.read_csv(\"final_ratios.csv\",index_col=0)\n",
- "weight_meanv = pd.read_excel('mean_weighted.xlsx',index_col=0)\n",
- "weight_minv = pd.read_excel('minimum_weighted.xlsx',index_col=0)\n",
- "weight_equal=pd.read_excel('equally_weighted.xlsx',index_col=0)"
+ "data_adj = pd.read_csv(\"final_ratios_oct.csv\")\n",
+ "weight_meanv = pd.read_excel('mean_weighted_rf.xlsx')\n",
+ "weight_minv = pd.read_excel('minimum_weighted_rf.xlsx')\n",
+ "weight_equal=pd.read_excel('equally_weighted_rf.xlsx')"
]
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -75,6 +77,7 @@
" \n",
" \n",
" | \n",
+ " Unnamed: 0 | \n",
" date | \n",
" gvkey | \n",
" tic | \n",
@@ -82,7 +85,6 @@
" adj_close_q | \n",
" y_return | \n",
" EPS | \n",
- " BPS | \n",
" DPS | \n",
" cur_ratio | \n",
" quick_ratio | \n",
@@ -98,148 +100,295 @@
"
\n",
" \n",
" 0 | \n",
- " 1996-06-01 | \n",
- " 1045 | \n",
- " AAL | \n",
- " 20.0 | \n",
- " 44.7500 | \n",
- " 0.016621 | \n",
- " 2.02 | \n",
- " 49.584416 | \n",
- " 0.0 | \n",
- " 0.717784 | \n",
- " 0.475086 | \n",
- " 0.179124 | \n",
+ " 66261 | \n",
+ " 1998-03-01 | \n",
+ " 126554 | \n",
+ " A | \n",
+ " 35.0 | \n",
+ " 0.00 | \n",
+ " 0.000000 | \n",
+ " 0.42 | \n",
+ " 0.000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
" 0.000000 | \n",
- " 0.800010 | \n",
- " 4.000257 | \n",
- " 44.306931 | \n",
- " 1.599698 | \n",
- " 1.768866 | \n",
"
\n",
" \n",
" 1 | \n",
- " 1996-09-01 | \n",
- " 1045 | \n",
- " AAL | \n",
- " 20.0 | \n",
- " 45.5000 | \n",
- " -0.133531 | \n",
- " 5.44 | \n",
- " 55.186813 | \n",
- " 0.0 | \n",
- " 0.738051 | \n",
- " 0.497600 | \n",
- " 0.185348 | \n",
+ " 66262 | \n",
+ " 1998-06-01 | \n",
+ " 126554 | \n",
+ " A | \n",
+ " 35.0 | \n",
+ " 0.00 | \n",
+ " 0.000000 | \n",
+ " 0.67 | \n",
+ " 0.000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
" 0.000000 | \n",
- " 0.744583 | \n",
- " 2.915173 | \n",
- " 27.164179 | \n",
- " 1.820000 | \n",
- " 1.648945 | \n",
"
\n",
" \n",
" 2 | \n",
- " 1996-12-01 | \n",
- " 1045 | \n",
- " AAL | \n",
- " 20.0 | \n",
- " 39.8125 | \n",
- " 0.101428 | \n",
- " 8.53 | \n",
- " 58.164835 | \n",
- " 0.0 | \n",
- " 0.789316 | \n",
- " 0.557623 | \n",
- " 0.269708 | \n",
+ " 66263 | \n",
+ " 1998-09-01 | \n",
+ " 126554 | \n",
+ " A | \n",
+ " 35.0 | \n",
+ " 0.00 | \n",
+ " 0.000000 | \n",
+ " 0.81 | \n",
+ " 0.000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
" 0.000000 | \n",
- " 0.737307 | \n",
- " 2.806726 | \n",
- " 26.021242 | \n",
- " 1.588311 | \n",
- " 1.368954 | \n",
"
\n",
" \n",
" 3 | \n",
- " 1997-03-01 | \n",
- " 1045 | \n",
- " AAL | \n",
- " 20.0 | \n",
- " 44.0625 | \n",
- " -0.065958 | \n",
- " 11.63 | \n",
- " 62.285714 | \n",
- " 0.0 | \n",
- " 0.803090 | \n",
- " 0.573662 | \n",
- " 0.325368 | \n",
- " 9.710564 | \n",
- " 0.723472 | \n",
- " 2.616267 | \n",
- " 21.759259 | \n",
- " 1.850767 | \n",
- " 1.414851 | \n",
+ " 66264 | \n",
+ " 1998-12-01 | \n",
+ " 126554 | \n",
+ " A | \n",
+ " 35.0 | \n",
+ " 0.00 | \n",
+ " 0.000000 | \n",
+ " 0.68 | \n",
+ " 0.000 | \n",
+ " 1.923077 | \n",
+ " 0.759850 | \n",
+ " 0.000000 | \n",
+ " 4.909465 | \n",
+ " 0.394024 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
"
\n",
" \n",
" 4 | \n",
- " 1997-06-01 | \n",
- " 1045 | \n",
- " AAL | \n",
- " 20.0 | \n",
- " 41.2500 | \n",
- " 0.114410 | \n",
- " 1.67 | \n",
- " 64.000000 | \n",
- " 0.0 | \n",
- " 0.851293 | \n",
- " 0.611247 | \n",
- " 0.329496 | \n",
- " 9.005358 | \n",
- " 0.713428 | \n",
- " 2.489526 | \n",
- " 49.401198 | \n",
- " 1.696227 | \n",
- " 1.289062 | \n",
+ " 66265 | \n",
+ " 1999-03-01 | \n",
+ " 126554 | \n",
+ " A | \n",
+ " 35.0 | \n",
+ " 0.00 | \n",
+ " 0.000000 | \n",
+ " 0.19 | \n",
+ " 0.000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 77580 | \n",
+ " 41126 | \n",
+ " 2021-09-01 | \n",
+ " 13721 | \n",
+ " ZTS | \n",
+ " 35.0 | \n",
+ " 186.36 | \n",
+ " 0.040899 | \n",
+ " 2.25 | \n",
+ " 0.250 | \n",
+ " 3.370636 | \n",
+ " 2.308943 | \n",
+ " 1.749402 | \n",
+ " 4.676923 | \n",
+ " 0.690667 | \n",
+ " 2.232767 | \n",
+ " 172.555556 | \n",
+ " 45.319980 | \n",
+ " 20.306517 | \n",
+ "
\n",
+ " \n",
+ " 77581 | \n",
+ " 41127 | \n",
+ " 2021-12-01 | \n",
+ " 13721 | \n",
+ " ZTS | \n",
+ " 35.0 | \n",
+ " 194.14 | \n",
+ " 0.228712 | \n",
+ " 3.42 | \n",
+ " 0.250 | \n",
+ " 4.542488 | \n",
+ " 3.008838 | \n",
+ " 2.225697 | \n",
+ " 4.889757 | \n",
+ " 0.658446 | \n",
+ " 1.927793 | \n",
+ " 167.362069 | \n",
+ " 46.111805 | \n",
+ " 19.632734 | \n",
+ "
\n",
+ " \n",
+ " 77582 | \n",
+ " 41128 | \n",
+ " 2022-03-01 | \n",
+ " 13721 | \n",
+ " ZTS | \n",
+ " 35.0 | \n",
+ " 244.03 | \n",
+ " -0.257716 | \n",
+ " 4.29 | \n",
+ " 0.250 | \n",
+ " 3.856427 | \n",
+ " 2.569839 | \n",
+ " 1.939343 | \n",
+ " 5.133274 | \n",
+ " 0.673094 | \n",
+ " 2.058979 | \n",
+ " 277.306818 | \n",
+ " 58.539205 | \n",
+ " 25.379013 | \n",
+ "
\n",
+ " \n",
+ " 77583 | \n",
+ " 41129 | \n",
+ " 2022-06-01 | \n",
+ " 13721 | \n",
+ " ZTS | \n",
+ " 35.0 | \n",
+ " 188.59 | \n",
+ " -0.092721 | \n",
+ " 1.26 | \n",
+ " 0.325 | \n",
+ " 2.253461 | \n",
+ " 1.436058 | \n",
+ " 1.033289 | \n",
+ " 4.838789 | \n",
+ " 0.663925 | \n",
+ " 1.975526 | \n",
+ " 149.674603 | \n",
+ " 44.723354 | \n",
+ " 19.077996 | \n",
+ "
\n",
+ " \n",
+ " 77584 | \n",
+ " 41130 | \n",
+ " 2022-09-01 | \n",
+ " 13721 | \n",
+ " ZTS | \n",
+ " 35.0 | \n",
+ " 171.89 | \n",
+ " 0.000000 | \n",
+ " 2.39 | \n",
+ " 0.325 | \n",
+ " 2.179941 | \n",
+ " 1.292363 | \n",
+ " 0.869223 | \n",
+ " 4.608830 | \n",
+ " 0.667393 | \n",
+ " 2.006550 | \n",
+ " 152.115044 | \n",
+ " 39.235211 | \n",
+ " 17.587312 | \n",
"
\n",
" \n",
"\n",
+ "77585 rows × 18 columns
\n",
""
],
"text/plain": [
- " date gvkey tic gsector adj_close_q y_return EPS BPS \\\n",
- "0 1996-06-01 1045 AAL 20.0 44.7500 0.016621 2.02 49.584416 \n",
- "1 1996-09-01 1045 AAL 20.0 45.5000 -0.133531 5.44 55.186813 \n",
- "2 1996-12-01 1045 AAL 20.0 39.8125 0.101428 8.53 58.164835 \n",
- "3 1997-03-01 1045 AAL 20.0 44.0625 -0.065958 11.63 62.285714 \n",
- "4 1997-06-01 1045 AAL 20.0 41.2500 0.114410 1.67 64.000000 \n",
+ " Unnamed: 0 date gvkey tic gsector adj_close_q y_return \\\n",
+ "0 66261 1998-03-01 126554 A 35.0 0.00 0.000000 \n",
+ "1 66262 1998-06-01 126554 A 35.0 0.00 0.000000 \n",
+ "2 66263 1998-09-01 126554 A 35.0 0.00 0.000000 \n",
+ "3 66264 1998-12-01 126554 A 35.0 0.00 0.000000 \n",
+ "4 66265 1999-03-01 126554 A 35.0 0.00 0.000000 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "77580 41126 2021-09-01 13721 ZTS 35.0 186.36 0.040899 \n",
+ "77581 41127 2021-12-01 13721 ZTS 35.0 194.14 0.228712 \n",
+ "77582 41128 2022-03-01 13721 ZTS 35.0 244.03 -0.257716 \n",
+ "77583 41129 2022-06-01 13721 ZTS 35.0 188.59 -0.092721 \n",
+ "77584 41130 2022-09-01 13721 ZTS 35.0 171.89 0.000000 \n",
"\n",
- " DPS cur_ratio quick_ratio cash_ratio acc_rec_turnover debt_ratio \\\n",
- "0 0.0 0.717784 0.475086 0.179124 0.000000 0.800010 \n",
- "1 0.0 0.738051 0.497600 0.185348 0.000000 0.744583 \n",
- "2 0.0 0.789316 0.557623 0.269708 0.000000 0.737307 \n",
- "3 0.0 0.803090 0.573662 0.325368 9.710564 0.723472 \n",
- "4 0.0 0.851293 0.611247 0.329496 9.005358 0.713428 \n",
+ " EPS DPS cur_ratio quick_ratio cash_ratio acc_rec_turnover \\\n",
+ "0 0.42 0.000 0.000000 0.000000 0.000000 0.000000 \n",
+ "1 0.67 0.000 0.000000 0.000000 0.000000 0.000000 \n",
+ "2 0.81 0.000 0.000000 0.000000 0.000000 0.000000 \n",
+ "3 0.68 0.000 1.923077 0.759850 0.000000 4.909465 \n",
+ "4 0.19 0.000 0.000000 0.000000 0.000000 0.000000 \n",
+ "... ... ... ... ... ... ... \n",
+ "77580 2.25 0.250 3.370636 2.308943 1.749402 4.676923 \n",
+ "77581 3.42 0.250 4.542488 3.008838 2.225697 4.889757 \n",
+ "77582 4.29 0.250 3.856427 2.569839 1.939343 5.133274 \n",
+ "77583 1.26 0.325 2.253461 1.436058 1.033289 4.838789 \n",
+ "77584 2.39 0.325 2.179941 1.292363 0.869223 4.608830 \n",
"\n",
- " debt_to_equity pe ps pb \n",
- "0 4.000257 44.306931 1.599698 1.768866 \n",
- "1 2.915173 27.164179 1.820000 1.648945 \n",
- "2 2.806726 26.021242 1.588311 1.368954 \n",
- "3 2.616267 21.759259 1.850767 1.414851 \n",
- "4 2.489526 49.401198 1.696227 1.289062 "
+ " debt_ratio debt_to_equity pe ps pb \n",
+ "0 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
+ "1 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
+ "2 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
+ "3 0.394024 0.000000 0.000000 0.000000 0.000000 \n",
+ "4 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
+ "... ... ... ... ... ... \n",
+ "77580 0.690667 2.232767 172.555556 45.319980 20.306517 \n",
+ "77581 0.658446 1.927793 167.362069 46.111805 19.632734 \n",
+ "77582 0.673094 2.058979 277.306818 58.539205 25.379013 \n",
+ "77583 0.663925 1.975526 149.674603 44.723354 19.077996 \n",
+ "77584 0.667393 2.006550 152.115044 39.235211 17.587312 \n",
+ "\n",
+ "[77585 rows x 18 columns]"
]
},
- "execution_count": 3,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "data_adj.head()"
+ "data_adj"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
@@ -263,6 +412,7 @@
" \n",
" \n",
" | \n",
+ " Unnamed: 0 | \n",
" trade_date | \n",
" gvkey | \n",
" weights | \n",
@@ -272,53 +422,58 @@
"
\n",
" \n",
" 0 | \n",
- " 2018-03-01 | \n",
- " 1230 | \n",
- " 0.00590 | \n",
- " 0.026920 | \n",
+ " 0 | \n",
+ " 2001-03-01 | \n",
+ " 1078 | \n",
+ " 0.00000 | \n",
+ " 0.044809 | \n",
"
\n",
" \n",
" 1 | \n",
- " 2018-03-01 | \n",
- " 1678 | \n",
- " 0.00572 | \n",
- " 0.013570 | \n",
+ " 1 | \n",
+ " 2001-03-01 | \n",
+ " 1230 | \n",
+ " 0.00000 | \n",
+ " 0.019396 | \n",
"
\n",
" \n",
" 2 | \n",
- " 2018-03-01 | \n",
- " 1722 | \n",
- " 0.00572 | \n",
- " 0.035988 | \n",
+ " 2 | \n",
+ " 2001-03-01 | \n",
+ " 1246 | \n",
+ " 0.01237 | \n",
+ " 0.014087 | \n",
"
\n",
" \n",
" 3 | \n",
- " 2018-03-01 | \n",
- " 2574 | \n",
- " 0.00549 | \n",
- " 0.035439 | \n",
+ " 3 | \n",
+ " 2001-03-01 | \n",
+ " 1327 | \n",
+ " 0.01888 | \n",
+ " 0.128518 | \n",
"
\n",
" \n",
" 4 | \n",
- " 2018-03-01 | \n",
- " 2751 | \n",
- " 0.00572 | \n",
- " 0.028170 | \n",
+ " 4 | \n",
+ " 2001-03-01 | \n",
+ " 1602 | \n",
+ " 0.00000 | \n",
+ " 0.045424 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " trade_date gvkey weights predicted_return\n",
- "0 2018-03-01 1230 0.00590 0.026920\n",
- "1 2018-03-01 1678 0.00572 0.013570\n",
- "2 2018-03-01 1722 0.00572 0.035988\n",
- "3 2018-03-01 2574 0.00549 0.035439\n",
- "4 2018-03-01 2751 0.00572 0.028170"
+ " Unnamed: 0 trade_date gvkey weights predicted_return\n",
+ "0 0 2001-03-01 1078 0.00000 0.044809\n",
+ "1 1 2001-03-01 1230 0.00000 0.019396\n",
+ "2 2 2001-03-01 1246 0.01237 0.014087\n",
+ "3 3 2001-03-01 1327 0.01888 0.128518\n",
+ "4 4 2001-03-01 1602 0.00000 0.045424"
]
},
- "execution_count": 4,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -329,7 +484,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 18,
"metadata": {},
"outputs": [
{
@@ -353,6 +508,7 @@
" \n",
" \n",
" | \n",
+ " Unnamed: 0 | \n",
" trade_date | \n",
" gvkey | \n",
" weights | \n",
@@ -362,53 +518,58 @@
"
\n",
" \n",
" 0 | \n",
- " 2018-03-01 | \n",
- " 1230 | \n",
- " 0.00531 | \n",
- " 0.026920 | \n",
+ " 0 | \n",
+ " 2001-03-01 | \n",
+ " 1078 | \n",
+ " 0.00662 | \n",
+ " 0.044809 | \n",
"
\n",
" \n",
" 1 | \n",
- " 2018-03-01 | \n",
- " 1678 | \n",
- " 0.00531 | \n",
- " 0.013570 | \n",
+ " 1 | \n",
+ " 2001-03-01 | \n",
+ " 1230 | \n",
+ " 0.00631 | \n",
+ " 0.019396 | \n",
"
\n",
" \n",
" 2 | \n",
- " 2018-03-01 | \n",
- " 1722 | \n",
- " 0.00532 | \n",
- " 0.035988 | \n",
+ " 2 | \n",
+ " 2001-03-01 | \n",
+ " 1246 | \n",
+ " 0.00629 | \n",
+ " 0.014087 | \n",
"
\n",
" \n",
" 3 | \n",
- " 2018-03-01 | \n",
- " 2574 | \n",
- " 0.00531 | \n",
- " 0.035439 | \n",
+ " 3 | \n",
+ " 2001-03-01 | \n",
+ " 1327 | \n",
+ " 0.00628 | \n",
+ " 0.128518 | \n",
"
\n",
" \n",
" 4 | \n",
- " 2018-03-01 | \n",
- " 2751 | \n",
- " 0.00531 | \n",
- " 0.028170 | \n",
+ " 4 | \n",
+ " 2001-03-01 | \n",
+ " 1602 | \n",
+ " 0.00628 | \n",
+ " 0.045424 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " trade_date gvkey weights predicted_return\n",
- "0 2018-03-01 1230 0.00531 0.026920\n",
- "1 2018-03-01 1678 0.00531 0.013570\n",
- "2 2018-03-01 1722 0.00532 0.035988\n",
- "3 2018-03-01 2574 0.00531 0.035439\n",
- "4 2018-03-01 2751 0.00531 0.028170"
+ " Unnamed: 0 trade_date gvkey weights predicted_return\n",
+ "0 0 2001-03-01 1078 0.00662 0.044809\n",
+ "1 1 2001-03-01 1230 0.00631 0.019396\n",
+ "2 2 2001-03-01 1246 0.00629 0.014087\n",
+ "3 3 2001-03-01 1327 0.00628 0.128518\n",
+ "4 4 2001-03-01 1602 0.00628 0.045424"
]
},
- "execution_count": 5,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@@ -419,7 +580,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -443,6 +604,7 @@
" \n",
" \n",
" | \n",
+ " Unnamed: 0 | \n",
" trade_date | \n",
" gvkey | \n",
" weights | \n",
@@ -452,53 +614,58 @@
"
\n",
" \n",
" 0 | \n",
- " 2018-03-01 | \n",
- " 1230 | \n",
- " 0.005556 | \n",
- " 0.026920 | \n",
+ " 0 | \n",
+ " 2001-03-01 | \n",
+ " 1078 | \n",
+ " 0.006579 | \n",
+ " 0.044809 | \n",
"
\n",
" \n",
" 1 | \n",
- " 2018-03-01 | \n",
- " 1678 | \n",
- " 0.005556 | \n",
- " 0.013570 | \n",
+ " 1 | \n",
+ " 2001-03-01 | \n",
+ " 1230 | \n",
+ " 0.006579 | \n",
+ " 0.019396 | \n",
"
\n",
" \n",
" 2 | \n",
- " 2018-03-01 | \n",
- " 1722 | \n",
- " 0.005556 | \n",
- " 0.035988 | \n",
+ " 2 | \n",
+ " 2001-03-01 | \n",
+ " 1246 | \n",
+ " 0.006579 | \n",
+ " 0.014087 | \n",
"
\n",
" \n",
" 3 | \n",
- " 2018-03-01 | \n",
- " 2574 | \n",
- " 0.005556 | \n",
- " 0.035439 | \n",
+ " 3 | \n",
+ " 2001-03-01 | \n",
+ " 1327 | \n",
+ " 0.006579 | \n",
+ " 0.128518 | \n",
"
\n",
" \n",
" 4 | \n",
- " 2018-03-01 | \n",
- " 2751 | \n",
- " 0.005556 | \n",
- " 0.028170 | \n",
+ " 4 | \n",
+ " 2001-03-01 | \n",
+ " 1602 | \n",
+ " 0.006579 | \n",
+ " 0.045424 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " trade_date gvkey weights predicted_return\n",
- "0 2018-03-01 1230 0.005556 0.026920\n",
- "1 2018-03-01 1678 0.005556 0.013570\n",
- "2 2018-03-01 1722 0.005556 0.035988\n",
- "3 2018-03-01 2574 0.005556 0.035439\n",
- "4 2018-03-01 2751 0.005556 0.028170"
+ " Unnamed: 0 trade_date gvkey weights predicted_return\n",
+ "0 0 2001-03-01 1078 0.006579 0.044809\n",
+ "1 1 2001-03-01 1230 0.006579 0.019396\n",
+ "2 2 2001-03-01 1246 0.006579 0.014087\n",
+ "3 3 2001-03-01 1327 0.006579 0.128518\n",
+ "4 4 2001-03-01 1602 0.006579 0.045424"
]
},
- "execution_count": 6,
+ "execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
@@ -509,14 +676,14 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "3870 3870 3870\n"
+ "16319 16319 16319\n"
]
}
],
@@ -535,7 +702,9 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [],
"source": []
},
@@ -555,8 +724,10 @@
},
{
"cell_type": "code",
- "execution_count": 10,
- "metadata": {},
+ "execution_count": 21,
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [],
"source": [
"#get unique stock name from table\n",
@@ -568,16 +739,16 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "644"
+ "858"
]
},
- "execution_count": 11,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -588,7 +759,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -611,16 +782,16 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "107"
+ "108"
]
},
- "execution_count": 13,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -631,148 +802,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " trade_date | \n",
- " gvkey | \n",
- " weights | \n",
- " predicted_return | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 2018-03-01 | \n",
- " 1230 | \n",
- " 0.00590 | \n",
- " 0.026920 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2018-03-01 | \n",
- " 1678 | \n",
- " 0.00572 | \n",
- " 0.013570 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 2018-03-01 | \n",
- " 1722 | \n",
- " 0.00572 | \n",
- " 0.035988 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 2018-03-01 | \n",
- " 2574 | \n",
- " 0.00549 | \n",
- " 0.035439 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 2018-03-01 | \n",
- " 2751 | \n",
- " 0.00572 | \n",
- " 0.028170 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 3865 | \n",
- " 2022-09-01 | \n",
- " 187697 | \n",
- " 0.00144 | \n",
- " 0.036295 | \n",
- "
\n",
- " \n",
- " 3866 | \n",
- " 2022-09-01 | \n",
- " 241637 | \n",
- " 0.00023 | \n",
- " 0.017479 | \n",
- "
\n",
- " \n",
- " 3867 | \n",
- " 2022-09-01 | \n",
- " 260774 | \n",
- " 0.00000 | \n",
- " 0.027837 | \n",
- "
\n",
- " \n",
- " 3868 | \n",
- " 2022-09-01 | \n",
- " 287882 | \n",
- " 0.00012 | \n",
- " -0.000764 | \n",
- "
\n",
- " \n",
- " 3869 | \n",
- " 2022-09-01 | \n",
- " 316056 | \n",
- " 0.04979 | \n",
- " 0.034261 | \n",
- "
\n",
- " \n",
- "
\n",
- "
3870 rows × 4 columns
\n",
- "
"
- ],
- "text/plain": [
- " trade_date gvkey weights predicted_return\n",
- "0 2018-03-01 1230 0.00590 0.026920\n",
- "1 2018-03-01 1678 0.00572 0.013570\n",
- "2 2018-03-01 1722 0.00572 0.035988\n",
- "3 2018-03-01 2574 0.00549 0.035439\n",
- "4 2018-03-01 2751 0.00572 0.028170\n",
- "... ... ... ... ...\n",
- "3865 2022-09-01 187697 0.00144 0.036295\n",
- "3866 2022-09-01 241637 0.00023 0.017479\n",
- "3867 2022-09-01 260774 0.00000 0.027837\n",
- "3868 2022-09-01 287882 0.00012 -0.000764\n",
- "3869 2022-09-01 316056 0.04979 0.034261\n",
- "\n",
- "[3870 rows x 4 columns]"
- ]
- },
- "execution_count": 14,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "weight_meanv"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
+ "execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
@@ -783,44 +813,157 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "19"
+ "['2001-03-01',\n",
+ " '2001-06-01',\n",
+ " '2001-09-01',\n",
+ " '2001-12-01',\n",
+ " '2002-03-01',\n",
+ " '2002-06-01',\n",
+ " '2002-09-01',\n",
+ " '2002-12-01',\n",
+ " '2003-03-01',\n",
+ " '2003-06-01',\n",
+ " '2003-09-01',\n",
+ " '2003-12-01',\n",
+ " '2004-03-01',\n",
+ " '2004-06-01',\n",
+ " '2004-09-01',\n",
+ " '2004-12-01',\n",
+ " '2005-03-01',\n",
+ " '2005-06-01',\n",
+ " '2005-09-01',\n",
+ " '2005-12-01',\n",
+ " '2006-03-01',\n",
+ " '2006-06-01',\n",
+ " '2006-09-01',\n",
+ " '2006-12-01',\n",
+ " '2007-03-01',\n",
+ " '2007-06-01',\n",
+ " '2007-09-01',\n",
+ " '2007-12-01',\n",
+ " '2008-03-01',\n",
+ " '2008-06-01',\n",
+ " '2008-09-01',\n",
+ " '2008-12-01',\n",
+ " '2009-03-01',\n",
+ " '2009-06-01',\n",
+ " '2009-09-01',\n",
+ " '2009-12-01',\n",
+ " '2010-03-01',\n",
+ " '2010-06-01',\n",
+ " '2010-09-01',\n",
+ " '2010-12-01',\n",
+ " '2011-03-01',\n",
+ " '2011-06-01',\n",
+ " '2011-09-01',\n",
+ " '2011-12-01',\n",
+ " '2012-03-01',\n",
+ " '2012-06-01',\n",
+ " '2012-09-01',\n",
+ " '2012-12-01',\n",
+ " '2013-03-01',\n",
+ " '2013-06-01',\n",
+ " '2013-09-01',\n",
+ " '2013-12-01',\n",
+ " '2014-03-01',\n",
+ " '2014-06-01',\n",
+ " '2014-09-01',\n",
+ " '2014-12-01',\n",
+ " '2015-03-01',\n",
+ " '2015-06-01',\n",
+ " '2015-09-01',\n",
+ " '2015-12-01',\n",
+ " '2016-03-01',\n",
+ " '2016-06-01',\n",
+ " '2016-09-01',\n",
+ " '2016-12-01',\n",
+ " '2017-03-01',\n",
+ " '2017-06-01',\n",
+ " '2017-09-01',\n",
+ " '2017-12-01',\n",
+ " '2018-03-01',\n",
+ " '2018-06-01',\n",
+ " '2018-09-01',\n",
+ " '2018-12-01',\n",
+ " '2019-03-01',\n",
+ " '2019-06-01',\n",
+ " '2019-09-01',\n",
+ " '2019-12-01',\n",
+ " '2020-03-01',\n",
+ " '2020-06-01',\n",
+ " '2020-09-01',\n",
+ " '2020-12-01',\n",
+ " '2021-03-01',\n",
+ " '2021-06-01',\n",
+ " '2021-09-01',\n",
+ " '2021-12-01',\n",
+ " '2022-03-01',\n",
+ " '2022-06-01',\n",
+ " '2022-09-01',\n",
+ " '2022-12-01']"
]
},
- "execution_count": 16,
+ "execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "len(tradedate)"
+ "tradedate"
]
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tradedate_full = [x for x in tradedate if x > '2018-01-01']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "19"
+ "['2018-03-01',\n",
+ " '2018-06-01',\n",
+ " '2018-09-01',\n",
+ " '2018-12-01',\n",
+ " '2019-03-01',\n",
+ " '2019-06-01',\n",
+ " '2019-09-01',\n",
+ " '2019-12-01',\n",
+ " '2020-03-01',\n",
+ " '2020-06-01',\n",
+ " '2020-09-01',\n",
+ " '2020-12-01',\n",
+ " '2021-03-01',\n",
+ " '2021-06-01',\n",
+ " '2021-09-01',\n",
+ " '2021-12-01',\n",
+ " '2022-03-01',\n",
+ " '2022-06-01',\n",
+ " '2022-09-01',\n",
+ " '2022-12-01']"
]
},
- "execution_count": 17,
+ "execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "#add the last trade date 20170901\n",
- "tradedate_full = tradedate.copy()\n",
- "len(tradedate_full)"
+ "tradedate_full"
]
},
{
@@ -832,14 +975,16 @@
},
{
"cell_type": "code",
- "execution_count": 18,
- "metadata": {},
+ "execution_count": 29,
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "23.0654399394989\n"
+ "14.190996885299683\n"
]
}
],
@@ -876,8 +1021,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "execution_count": 30,
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [],
"source": [
"balance_daily = pd.read_excel('balance_daily_user8.xlsx', index_col=0)\n"
@@ -885,7 +1032,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 31,
"metadata": {},
"outputs": [
{
@@ -920,7 +1067,6 @@
" 1998-03-01 | \n",
" 1998-06-01 | \n",
" ... | \n",
- " 2020-06-01 | \n",
" 2020-09-01 | \n",
" 2020-12-01 | \n",
" 2021-03-01 | \n",
@@ -930,6 +1076,7 @@
" 2022-03-01 | \n",
" 2022-06-01 | \n",
" 2022-09-01 | \n",
+ " 2022-12-01 | \n",
" \n",
" \n",
" \n",
@@ -946,7 +1093,6 @@
" 64.2500 | \n",
" 71.59350 | \n",
" ... | \n",
- " 12.19 | \n",
" 13.07 | \n",
" 12.29 | \n",
" 15.77 | \n",
@@ -956,6 +1102,7 @@
" 17.96 | \n",
" 18.25 | \n",
" 12.68 | \n",
+ " 12.04 | \n",
" \n",
" \n",
" 1075 | \n",
@@ -970,7 +1117,6 @@
" 42.3750 | \n",
" 44.43700 | \n",
" ... | \n",
- " 75.79 | \n",
" 73.29 | \n",
" 74.55 | \n",
" 79.95 | \n",
@@ -980,6 +1126,7 @@
" 70.59 | \n",
" 78.10 | \n",
" 73.12 | \n",
+ " NaN | \n",
"
\n",
" \n",
" 1078 | \n",
@@ -994,7 +1141,6 @@
" 32.7500 | \n",
" 37.65600 | \n",
" ... | \n",
- " 78.91 | \n",
" 91.43 | \n",
" 108.83 | \n",
" 109.49 | \n",
@@ -1004,6 +1150,7 @@
" 140.74 | \n",
" 118.36 | \n",
" 108.65 | \n",
+ " 96.76 | \n",
"
\n",
" \n",
" 1161 | \n",
@@ -1018,7 +1165,6 @@
" 8.8750 | \n",
" 14.53100 | \n",
" ... | \n",
- " 45.48 | \n",
" 52.61 | \n",
" 81.99 | \n",
" 91.71 | \n",
@@ -1028,6 +1174,7 @@
" 143.90 | \n",
" 109.34 | \n",
" 76.47 | \n",
+ " NaN | \n",
"
\n",
" \n",
" 1177 | \n",
@@ -1090,7 +1237,6 @@
" NaN | \n",
" NaN | \n",
" ... | \n",
- " 1.42 | \n",
" 1.72 | \n",
" 1.51 | \n",
" 2.05 | \n",
@@ -1100,6 +1246,7 @@
" 3.18 | \n",
" NaN | \n",
" NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
" 287882 | \n",
@@ -1114,7 +1261,6 @@
" NaN | \n",
" NaN | \n",
" ... | \n",
- " 9.51 | \n",
" 11.13 | \n",
" 9.84 | \n",
" 12.91 | \n",
@@ -1124,6 +1270,7 @@
" 12.89 | \n",
" 18.60 | \n",
" 10.86 | \n",
+ " NaN | \n",
"
\n",
" \n",
" 294524 | \n",
@@ -1138,7 +1285,6 @@
" NaN | \n",
" NaN | \n",
" ... | \n",
- " 49.63 | \n",
" 65.72 | \n",
" 70.49 | \n",
" 91.66 | \n",
@@ -1148,6 +1294,7 @@
" 92.23 | \n",
" 102.82 | \n",
" 87.46 | \n",
+ " NaN | \n",
"
\n",
" \n",
" 312009 | \n",
@@ -1162,7 +1309,6 @@
" NaN | \n",
" NaN | \n",
" ... | \n",
- " 9.61 | \n",
" 6.98 | \n",
" 6.50 | \n",
" 6.22 | \n",
@@ -1172,6 +1318,7 @@
" 7.07 | \n",
" 8.80 | \n",
" 8.86 | \n",
+ " NaN | \n",
"
\n",
" \n",
" 316056 | \n",
@@ -1186,7 +1333,6 @@
" NaN | \n",
" NaN | \n",
" ... | \n",
- " 92.02 | \n",
" 102.22 | \n",
" 98.91 | \n",
" 116.38 | \n",
@@ -1196,10 +1342,11 @@
" 132.44 | \n",
" 109.78 | \n",
" 97.90 | \n",
+ " NaN | \n",
"
\n",
" \n",
"\n",
- "644 rows × 107 columns
\n",
+ "858 rows × 108 columns
\n",
""
],
"text/plain": [
@@ -1229,36 +1376,36 @@
"312009 NaN NaN NaN NaN NaN ... \n",
"316056 NaN NaN NaN NaN NaN ... \n",
"\n",
- " 2020-06-01 2020-09-01 2020-12-01 2021-03-01 2021-06-01 \\\n",
- "1045 12.19 13.07 12.29 15.77 23.90 \n",
- "1075 75.79 73.29 74.55 79.95 81.35 \n",
- "1078 78.91 91.43 108.83 109.49 119.84 \n",
- "1161 45.48 52.61 81.99 91.71 78.50 \n",
+ " 2020-09-01 2020-12-01 2021-03-01 2021-06-01 2021-09-01 \\\n",
+ "1045 13.07 12.29 15.77 23.90 21.21 \n",
+ "1075 73.29 74.55 79.95 81.35 81.97 \n",
+ "1078 91.43 108.83 109.49 119.84 115.93 \n",
+ "1161 52.61 81.99 91.71 78.50 93.93 \n",
"1177 NaN NaN NaN NaN NaN \n",
"... ... ... ... ... ... \n",
- "270281 1.42 1.72 1.51 2.05 1.73 \n",
- "287882 9.51 11.13 9.84 12.91 12.82 \n",
- "294524 49.63 65.72 70.49 91.66 104.05 \n",
- "312009 9.61 6.98 6.50 6.22 7.15 \n",
- "316056 92.02 102.22 98.91 116.38 125.62 \n",
+ "270281 1.72 1.51 2.05 1.73 2.03 \n",
+ "287882 11.13 9.84 12.91 12.82 14.62 \n",
+ "294524 65.72 70.49 91.66 104.05 102.87 \n",
+ "312009 6.98 6.50 6.22 7.15 9.00 \n",
+ "316056 102.22 98.91 116.38 125.62 139.30 \n",
"\n",
- " 2021-09-01 2021-12-01 2022-03-01 2022-06-01 2022-09-01 \n",
- "1045 21.21 20.52 17.96 18.25 12.68 \n",
- "1075 81.97 72.36 70.59 78.10 73.12 \n",
- "1078 115.93 118.13 140.74 118.36 108.65 \n",
- "1161 93.93 102.90 143.90 109.34 76.47 \n",
+ " 2021-12-01 2022-03-01 2022-06-01 2022-09-01 2022-12-01 \n",
+ "1045 20.52 17.96 18.25 12.68 12.04 \n",
+ "1075 72.36 70.59 78.10 73.12 NaN \n",
+ "1078 118.13 140.74 118.36 108.65 96.76 \n",
+ "1161 102.90 143.90 109.34 76.47 NaN \n",
"1177 NaN NaN NaN NaN NaN \n",
"... ... ... ... ... ... \n",
- "270281 2.03 4.06 3.18 NaN NaN \n",
- "287882 14.62 14.35 12.89 18.60 10.86 \n",
- "294524 102.87 93.85 92.23 102.82 87.46 \n",
- "312009 9.00 9.37 7.07 8.80 8.86 \n",
- "316056 139.30 132.18 132.44 109.78 97.90 \n",
+ "270281 4.06 3.18 NaN NaN NaN \n",
+ "287882 14.35 12.89 18.60 10.86 NaN \n",
+ "294524 93.85 92.23 102.82 87.46 NaN \n",
+ "312009 9.37 7.07 8.80 8.86 NaN \n",
+ "316056 132.18 132.44 109.78 97.90 NaN \n",
"\n",
- "[644 rows x 107 columns]"
+ "[858 rows x 108 columns]"
]
},
- "execution_count": 19,
+ "execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
@@ -1289,8 +1436,10 @@
},
{
"cell_type": "code",
- "execution_count": 20,
- "metadata": {},
+ "execution_count": 32,
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [],
"source": [
"# set initial capital to 1 million and transaction percet to 0.1%\n",
@@ -1301,16 +1450,16 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "(3870, 4)"
+ "(16319, 5)"
]
},
- "execution_count": 21,
+ "execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
@@ -1321,8 +1470,10 @@
},
{
"cell_type": "code",
- "execution_count": 22,
- "metadata": {},
+ "execution_count": 38,
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [],
"source": [
"#construct an empty matrix\n",
@@ -1365,6 +1516,7 @@
" balance_price_full = []\n",
" balance_price_full = balance_daily[tradedate_full]\n",
" balance_price = balance_daily[tradedate]\n",
+ " balance_price.replace([0, np.inf, -np.inf], np.nan, inplace=True)\n",
" for j in range(len(tradedate) - 1):\n",
" i = tradedate[j]\n",
" #current capital\n",
@@ -1436,9 +1588,21 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 39,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_3642/1845856610.py:41: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " balance_price.replace([0, np.inf, -np.inf], np.nan, inplace=True)\n"
+ ]
+ }
+ ],
"source": [
"#calculate mean variance allocation portolio\n",
"(balance_share_meanv,\n",
@@ -1447,21 +1611,21 @@
" portfolio_meanv,\n",
" portfolio_cost_meanv,\n",
" portfolio_return_meanv,\n",
- " portfolio_cumsum_meanv)=cal_portfolio(stocks_name,tradedate, weight_meanv, capital, transaction_percent)\n"
+ " portfolio_cumsum_meanv)=cal_portfolio(stocks_name,tradedate_full, weight_meanv, capital, transaction_percent)\n"
]
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "0.2618634791131256"
+ "-0.6256100382905669"
]
},
- "execution_count": 24,
+ "execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
@@ -1472,9 +1636,21 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 41,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_3642/1845856610.py:41: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " balance_price.replace([0, np.inf, -np.inf], np.nan, inplace=True)\n"
+ ]
+ }
+ ],
"source": [
"#calculate minimum variance allocation portolio\n",
"(balance_share_minv,\n",
@@ -1483,14 +1659,26 @@
" portfolio_minv,\n",
" portfolio_cost_minv,\n",
" portfolio_return_minv,\n",
- " portfolio_cumsum_minv)=cal_portfolio(stocks_name,tradedate, weight_minv, capital, transaction_percent)"
+ " portfolio_cumsum_minv)=cal_portfolio(stocks_name,tradedate_full, weight_minv, capital, transaction_percent)"
]
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 42,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_3642/1845856610.py:41: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " balance_price.replace([0, np.inf, -np.inf], np.nan, inplace=True)\n"
+ ]
+ }
+ ],
"source": [
"#calculate equally weighted allocation portolio\n",
"(balance_share_equal,\n",
@@ -1499,21 +1687,21 @@
" portfolio_equal,\n",
" portfolio_cost_equal,\n",
" portfolio_return_equal,\n",
- " portfolio_cumsum_equal)=cal_portfolio(stocks_name,tradedate, weight_equal, capital, transaction_percent)"
+ " portfolio_cumsum_equal)=cal_portfolio(stocks_name,tradedate_full, weight_equal, capital, transaction_percent)"
]
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "0.3362423821397683"
+ "-0.6039422883904094"
]
},
- "execution_count": 27,
+ "execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
@@ -1522,44 +1710,6 @@
"sum(portfolio_return_equal)"
]
},
- {
- "cell_type": "code",
- "execution_count": 28,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['2018-03-01',\n",
- " '2018-06-01',\n",
- " '2018-09-01',\n",
- " '2018-12-01',\n",
- " '2019-03-01',\n",
- " '2019-06-01',\n",
- " '2019-09-01',\n",
- " '2019-12-01',\n",
- " '2020-03-01',\n",
- " '2020-06-01',\n",
- " '2020-09-01',\n",
- " '2020-12-01',\n",
- " '2021-03-01',\n",
- " '2021-06-01',\n",
- " '2021-09-01',\n",
- " '2021-12-01',\n",
- " '2022-03-01',\n",
- " '2022-06-01',\n",
- " '2022-09-01']"
- ]
- },
- "execution_count": 28,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "tradedate"
- ]
- },
{
"cell_type": "markdown",
"metadata": {},
@@ -1569,7 +1719,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
@@ -1580,7 +1730,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
@@ -1589,7 +1739,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
@@ -1598,7 +1748,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
@@ -1607,7 +1757,53 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 2677.669922\n",
+ "1 2734.620117\n",
+ "2 2901.520020\n",
+ "3 2760.169922\n",
+ "4 2803.689941\n",
+ "5 2752.060059\n",
+ "6 2926.459961\n",
+ "7 3140.979980\n",
+ "8 2954.219971\n",
+ "9 3055.729980\n",
+ "10 3526.649902\n",
+ "11 3662.449951\n",
+ "12 3901.820068\n",
+ "13 4202.040039\n",
+ "14 4524.089844\n",
+ "15 4513.040039\n",
+ "16 4306.259766\n",
+ "Name: close, dtype: float64"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "spx"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tradedate = tradedate[:-1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
@@ -1642,16 +1838,16 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "0.3589495840431508"
+ "0.8455213218848242"
]
},
- "execution_count": 34,
+ "execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
@@ -1669,7 +1865,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -1679,7 +1875,7 @@
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtime_ind\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[1;32m/home/wenbiaolin/ML4StockRec_using_gvkey/fundamental_back_testing.ipynb Cell 42\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m time_ind\u001b[39m.\u001b[39mhead()\n",
"\u001b[0;31mNameError\u001b[0m: name 'time_ind' is not defined"
]
}
@@ -1697,100 +1893,18 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
- "time_ind = tradedate"
+ "time_ind = tradedate_full[:-1]"
]
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 66,
"metadata": {},
"outputs": [
- {
- "data": {
- "text/html": [
- " \n",
- " "
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
{
"data": {
"application/vnd.plotly.v1+json": {
@@ -1826,25 +1940,26 @@
"2022-09-01"
],
"y": [
- 998169.8682883774,
- 978517.3697772473,
- 1059184.1072124462,
- 1084144.3282912015,
- 910971.1389600794,
- 1007681.1598663231,
- 1010369.5383078569,
- 987943.0709733691,
- 1117031.829369674,
- 758732.1455078851,
- 970815.4700971642,
- 993859.458400753,
- 1220237.3470690495,
- 1392506.8056284683,
- 1467397.265600469,
- 1446242.5045718001,
- 1457974.6215145695,
- 1362297.8389132002,
- 1068964.4232
+ 998097.6373722198,
+ 1023339.4722018229,
+ 1004583.3065904576,
+ 1031019.6217769366,
+ 809909.7023460879,
+ 949091.4082848827,
+ 935672.7535135892,
+ 950925.9353386193,
+ 1082048.282080676,
+ 760624.137775737,
+ 958583.6574496945,
+ 1028353.2224276176,
+ 1252929.3533126737,
+ 1413438.401572646,
+ 1504081.908906326,
+ 1459661.9218582301,
+ 1514824.4380524335,
+ 1510132.2849140002,
+ 1162363.9356803,
+ 25787.75
]
},
{
@@ -1873,25 +1988,26 @@
"2022-09-01"
],
"y": [
- 998967.5243363113,
- 981656.782363495,
- 1002093.5958363946,
- 1025279.9978075742,
- 852960.5426584905,
- 965310.80165797,
- 970721.8329451823,
- 949365.083501407,
- 1019321.6109255173,
- 703796.3635158397,
- 896874.2848335523,
- 975292.9920188024,
- 1196701.1319587466,
- 1366093.765435214,
- 1439036.7272038234,
- 1412478.6554720271,
- 1478830.9179238824,
- 1424229.4466146997,
- 1171977.7292
+ 998949.8181730722,
+ 980399.1316273996,
+ 998854.0858749872,
+ 1024825.8053336296,
+ 854417.3357285586,
+ 980499.5171671169,
+ 973611.4804992174,
+ 958861.2789376328,
+ 1038140.6236854413,
+ 724188.7089781559,
+ 916285.3825595068,
+ 977104.7060194196,
+ 1187790.756345991,
+ 1338886.6557853844,
+ 1424632.4071524558,
+ 1381438.4756382285,
+ 1423259.0638506,
+ 1387696.934248,
+ 1136045.7860163334,
+ 111673.29999999999
]
},
{
@@ -1920,25 +2036,26 @@
"2022-09-01"
],
"y": [
- 998918.9080804393,
- 987579.5852179282,
- 1009580.0109353173,
- 1052959.6538395123,
- 875778.4242493137,
- 985919.5572947458,
- 987431.6535367251,
- 969488.7031053505,
- 1030064.5954796389,
- 717598.9786928936,
- 913159.3419426266,
- 985032.4748272595,
- 1209366.2029215146,
- 1380503.506009758,
- 1456589.403747596,
- 1433425.136001657,
- 1500565.2090683258,
- 1464990.8531060999,
- 1207466.6736333333
+ 998895.8909723532,
+ 992374.6853119333,
+ 1008796.3304465894,
+ 1033877.2725289491,
+ 864068.4192988311,
+ 995085.7233387289,
+ 976002.05972606,
+ 947259.1794897235,
+ 1032189.032513042,
+ 715572.3709715783,
+ 903789.2488963603,
+ 954037.5797818311,
+ 1161121.0697542438,
+ 1309443.9512316308,
+ 1392765.2173538767,
+ 1351914.1622707942,
+ 1391383.3055489,
+ 1364547.2558339997,
+ 1131390.3898183997,
+ 104749.20999999999
]
},
{
@@ -1968,24 +2085,92 @@
],
"y": [
1000000,
- 1020825.6331307443,
- 1079545.0964922423,
- 1024260.889674335,
- 1040159.8852320837,
- 1020646.0185876773,
- 1081470.5534677668,
- 1155331.9233889321,
- 1082294.0983676286,
- 1118247.4336416188,
- 1267569.0302473544,
- 1314569.2455391786,
- 1395215.8640000108,
- 1494898.7946529207,
- 1601313.9812369465,
- 1597393.2966024475,
- 1520688.8150717698,
- 1444666.1334546707,
- 1391251.8476192872
+ 1015420.4215210223,
+ 901577.66175118,
+ 906222.2329908399,
+ 900080.8861314631,
+ 845560.1663892802,
+ 706118.0063925646,
+ 721381.9911742795,
+ 639771.4711544869,
+ 721064.9863496653,
+ 752840.1612591005,
+ 796535.1595781911,
+ 855691.12879557,
+ 829154.9188806105,
+ 817691.3192282643,
+ 876346.3705444649,
+ 890131.5083541947,
+ 884067.531932604,
+ 898085.6429648924,
+ 928678.2863560262,
+ 947787.7721022192,
+ 943711.196275488,
+ 961923.0628269009,
+ 1020945.1660701773,
+ 1025645.5162149781,
+ 1114548.451961276,
+ 1067402.9019714282,
+ 1072555.6604772124,
+ 951236.9149655188,
+ 998616.0367663883,
+ 907109.4708895985,
+ 388523.13879740227,
+ 345648.1248933483,
+ 421818.49492525955,
+ 445135.91674057767,
+ 489623.01719070674,
+ 492629.09030244197,
+ 471924.78206550394,
+ 476109.8398905623,
+ 525762.8900103732,
+ 566114.8640599039,
+ 569654.8713096414,
+ 517566.6533542639,
+ 534267.4134848869,
+ 584622.9066905852,
+ 540686.108427394,
+ 590096.5576499363,
+ 594096.7443247958,
+ 634018.8170999325,
+ 677773.4946512762,
+ 678699.0607562413,
+ 743659.6056120741,
+ 765112.0901678336,
+ 790616.223033034,
+ 822108.7647782513,
+ 842154.6111973216,
+ 862587.2383262035,
+ 865540.4988961497,
+ 776049.0756745328,
+ 845724.9010333614,
+ 792596.479097556,
+ 838272.1945614931,
+ 865893.3362604625,
+ 873884.070175674,
+ 948610.3881477184,
+ 961921.8726763312,
+ 979979.1449241162,
+ 1041424.8497400946,
+ 1055212.3818667624,
+ 1077187.8478065385,
+ 1139149.3526021626,
+ 1080812.773046224,
+ 1097589.590018005,
+ 1076998.3163167306,
+ 1141181.1186434878,
+ 1219120.5507259427,
+ 1142050.1334188452,
+ 1179988.5379693666,
+ 1337554.5355878528,
+ 1387149.7447141893,
+ 1472249.0550697441,
+ 1577435.7177554606,
+ 1689726.3402575864,
+ 1685589.1852858684,
+ 1604649.6666300267,
+ 1524429.591684949,
+ 1468066.175902882
]
}
],
@@ -2004,6 +2189,11 @@
"line": {
"color": "#E5ECF6",
"width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
}
},
"type": "bar"
@@ -2015,6 +2205,11 @@
"line": {
"color": "#E5ECF6",
"width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
}
},
"type": "barpolar"
@@ -2213,9 +2408,10 @@
"histogram": [
{
"marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
}
},
"type": "histogram"
@@ -2351,11 +2547,10 @@
],
"scatter": [
{
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
},
"type": "scatter"
}
@@ -2533,6 +2728,7 @@
"arrowhead": 0,
"arrowwidth": 1
},
+ "autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
@@ -2811,23 +3007,9 @@
}
},
"text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " "
+ " }) }; }); "
]
},
"metadata": {},
@@ -2886,7 +3064,7 @@
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 67,
"metadata": {},
"outputs": [
{
@@ -2925,24 +3103,25 @@
],
"y": [
0,
- -0.019688531116281282,
- 0.08243771641331424,
- 0.023565516994439692,
- -0.1597325972309175,
- 0.1061614542658763,
- 0.002667885982794803,
- -0.022196301931318264,
- 0.1306641669839543,
- -0.32076049620177116,
- 0.2795233151052448,
- 0.0237367337185948,
- 0.22777656011099146,
- 0.14117700869687494,
- 0.05378103695385604,
- -0.014416519319335226,
- 0.008112136730653618,
- -0.06562307820007086,
- -0.21532252884377515
+ 0.02528994547673663,
+ -0.01832839064734743,
+ 0.026315702254901603,
+ -0.21445752802431756,
+ 0.17184842401026096,
+ -0.014138421920331748,
+ 0.016301833913355053,
+ 0.13788912665986414,
+ -0.29705157304706487,
+ 0.2602593184234761,
+ 0.07278401257491145,
+ 0.2183842341203567,
+ 0.1281070220245025,
+ 0.06412978961999784,
+ -0.029532957470644147,
+ 0.0377912963050913,
+ -0.0030974897292163554,
+ -0.23028999029280742,
+ -0.9778143925422917
]
},
{
@@ -2972,24 +3151,25 @@
],
"y": [
0,
- -0.011351594980118283,
- 0.02227711674754221,
- 0.04296800890897818,
- -0.1682697232929343,
- 0.12576369775247803,
- 0.0015336912943773044,
- -0.01817133405345837,
- 0.06248230864398826,
- -0.30334565245517336,
- 0.2725204035350582,
- 0.07870820522049553,
- 0.2277424692354386,
- 0.14150991045956143,
- 0.05511459942449454,
- -0.015903086817974053,
- 0.04683891148577655,
- -0.023707304252584542,
- -0.17578552038516773
+ -0.006528413740967504,
+ 0.01654782752695298,
+ 0.024862245554815286,
+ -0.16424469106932924,
+ 0.1516283908931828,
+ -0.01917790916408591,
+ -0.029449610223572602,
+ 0.08965851676314089,
+ -0.30674290422424455,
+ 0.26302982837253475,
+ 0.05559739833908219,
+ 0.21706009738082693,
+ 0.12774109896117902,
+ 0.06363102906686155,
+ -0.02933089839843624,
+ 0.029195006886983142,
+ -0.019287316160742286,
+ -0.17086756432857764,
+ -0.9074155031343218
]
},
{
@@ -3019,24 +3199,25 @@
],
"y": [
0,
- -0.017328633365050694,
- 0.020818695332287855,
- 0.023137960433553293,
- -0.16807062999138395,
- 0.1317180026280095,
- 0.005605480926887663,
- -0.02200089533268119,
- 0.0736876978518103,
- -0.30954435187849,
- 0.27433776490856676,
- 0.08743556205293987,
- 0.227017052056984,
- 0.14154965592721336,
- 0.05339528194491913,
- -0.01845545094835832,
- 0.046975762922011305,
- -0.036922051498515615,
- -0.177114521830935
+ -0.01857018862028424,
+ 0.0188239194142834,
+ 0.026001514961909,
+ -0.16628042416398262,
+ 0.147565102165265,
+ -0.007025028107918447,
+ -0.015149987296802895,
+ 0.08268072398923629,
+ -0.30241752181197135,
+ 0.26525775837129945,
+ 0.06637596170095289,
+ 0.21562279766809775,
+ 0.1272075057261859,
+ 0.06404257671593072,
+ -0.030319352063991716,
+ 0.030273218062100333,
+ -0.024986406555098595,
+ -0.181344457871875,
+ -0.9017000006737453
]
},
{
@@ -3066,6 +3247,74 @@
],
"y": [
0,
+ 0.015420421521022359,
+ -0.11211391592786224,
+ 0.005151604167563888,
+ -0.006776866242961527,
+ -0.06057313357304172,
+ -0.16491098509543442,
+ 0.021616761849334407,
+ -0.1131307975777791,
+ 0.1270664899272264,
+ 0.04406700576364758,
+ 0.0580402063646712,
+ 0.07426661397935691,
+ -0.031011435110132206,
+ -0.013825642701152312,
+ 0.0717325107126723,
+ 0.015730238947832015,
+ -0.006812450030897837,
+ 0.01585638033968322,
+ 0.03406428287856472,
+ 0.020577078227138664,
+ -0.004301148365407913,
+ 0.019298135513586202,
+ 0.06135844489456598,
+ 0.004603920270168108,
+ 0.0866799828408391,
+ -0.04230013500703855,
+ 0.004827379142652915,
+ -0.11311184116795885,
+ 0.04980790910809731,
+ -0.09163338310998546,
+ -0.5716910127546356,
+ -0.11035382355029158,
+ 0.22036968971092805,
+ 0.055278329650883605,
+ 0.09994048733671584,
+ 0.006139566577125166,
+ -0.042028188437323,
+ 0.008868061148942749,
+ 0.10428906516870991,
+ 0.07674937660346187,
+ 0.006253160753191119,
+ -0.0914382033381479,
+ 0.032267844194343066,
+ 0.09425147769586494,
+ -0.0751540826751167,
+ 0.09138472110233163,
+ 0.006778868005585903,
+ 0.06719793225008994,
+ 0.06901163872624808,
+ 0.0013655979649091856,
+ 0.09571332658608728,
+ 0.02884718276193436,
+ 0.03333385159239327,
+ 0.039832906064591965,
+ 0.02438344812499034,
+ 0.024262322924091233,
+ 0.003423723930435997,
+ -0.1033936867607559,
+ 0.08978275671324937,
+ -0.06281998067088922,
+ 0.05762795655608152,
+ 0.032950086950478095,
+ 0.009228312057142194,
+ 0.08551056200969813,
+ 0.014032615175767864,
+ 0.01877207781703161,
+ 0.06270103311304294,
+ 0.013239104223515227,
0.020825633130744352,
0.057521540854546034,
-0.051210650669010274,
@@ -3102,6 +3351,11 @@
"line": {
"color": "#E5ECF6",
"width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
}
},
"type": "bar"
@@ -3113,6 +3367,11 @@
"line": {
"color": "#E5ECF6",
"width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
}
},
"type": "barpolar"
@@ -3311,9 +3570,10 @@
"histogram": [
{
"marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
}
},
"type": "histogram"
@@ -3449,11 +3709,10 @@
],
"scatter": [
{
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
},
"type": "scatter"
}
@@ -3631,6 +3890,7 @@
"arrowhead": 0,
"arrowwidth": 1
},
+ "autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
@@ -3909,23 +4169,9 @@
}
},
"text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " "
+ " }) }; }); "
]
},
"metadata": {},
@@ -3981,7 +4223,7 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 68,
"metadata": {},
"outputs": [
{
@@ -4020,24 +4262,25 @@
],
"y": [
0,
- -0.019688531116281282,
- 0.06274918529703295,
- 0.08631470229147264,
- -0.07341789493944487,
- 0.03274355932643143,
- 0.035411445309226235,
- 0.013215143377907971,
- 0.14387931036186227,
- -0.1768811858399089,
- 0.10264212926533592,
- 0.12637886298393072,
- 0.3541554230949222,
- 0.49533243179179715,
- 0.5491134687456531,
- 0.534696949426318,
- 0.5428090861569715,
- 0.4771860079569007,
- 0.2618634791131256
+ 0.02528994547673663,
+ 0.006961554829389201,
+ 0.0332772570842908,
+ -0.18118027094002676,
+ -0.009331846929765797,
+ -0.023470268850097543,
+ -0.0071684349367424904,
+ 0.13072069172312165,
+ -0.16633088132394322,
+ 0.09392843709953289,
+ 0.16671244967444432,
+ 0.385096683794801,
+ 0.5132037058193035,
+ 0.5773334954393013,
+ 0.5478005379686572,
+ 0.5855918342737485,
+ 0.5824943445445322,
+ 0.3522043542517248,
+ -0.6256100382905669
]
},
{
@@ -4067,24 +4310,25 @@
],
"y": [
0,
- -0.011351594980118283,
- 0.010925521767423925,
- 0.0538935306764021,
- -0.1143761926165322,
- 0.01138750513594583,
- 0.012921196430323134,
- -0.005250137623135236,
- 0.057232171020853026,
- -0.24611348143432032,
- 0.026406922100737862,
- 0.10511512732123339,
- 0.33285759655667196,
- 0.4743675070162334,
- 0.529482106440728,
- 0.513579019622754,
- 0.5604179311085306,
- 0.5367106268559461,
- 0.36092510647077836
+ -0.006528413740967504,
+ 0.010019413785985476,
+ 0.03488165934080076,
+ -0.12936303172852848,
+ 0.02226535916465433,
+ 0.003087450000568419,
+ -0.026362160223004184,
+ 0.06329635654013671,
+ -0.24344654768410784,
+ 0.019583280688426907,
+ 0.0751806790275091,
+ 0.292240776408336,
+ 0.41998187536951503,
+ 0.4836129044363766,
+ 0.4542820060379404,
+ 0.4834770129249235,
+ 0.4641896967641812,
+ 0.29332213243560357,
+ -0.6140933706987183
]
},
{
@@ -4114,24 +4358,25 @@
],
"y": [
0,
- -0.017328633365050694,
- 0.003490061967237161,
- 0.026628022400790454,
- -0.1414426075905935,
- -0.009724604962583983,
- -0.004119124035696319,
- -0.02612001936837751,
- 0.04756767848343279,
- -0.2619766733950572,
- 0.012361091513509559,
- 0.09979665356644943,
- 0.3268137056234334,
- 0.4683633615506468,
- 0.5217586434955659,
- 0.5033031925472076,
- 0.5502789554692189,
- 0.5133569039707033,
- 0.3362423821397683
+ -0.01857018862028424,
+ 0.0002537307939991622,
+ 0.026255245755908162,
+ -0.14002517840807446,
+ 0.007539923757190536,
+ 0.0005148956492720897,
+ -0.014635091647530806,
+ 0.06804563234170548,
+ -0.23437188947026588,
+ 0.030885868901033564,
+ 0.09726183060198645,
+ 0.31288462827008423,
+ 0.4400921339962701,
+ 0.5041347107122008,
+ 0.4738153586482091,
+ 0.5040885767103095,
+ 0.4791021701552109,
+ 0.2977577122833359,
+ -0.6039422883904094
]
},
{
@@ -4161,24 +4406,92 @@
],
"y": [
0,
- 0.020825633130744352,
- 0.07834717398529038,
- 0.02713652331628011,
- 0.042658931442986735,
- 0.023898482283718794,
- 0.0834926351754879,
- 0.15178979748316862,
- 0.08857175291718634,
- 0.12179131433604357,
- 0.2553231178649492,
- 0.29240213618291233,
- 0.3537504590152744,
- 0.42519670129575154,
- 0.4963822470762823,
- 0.4939338299136029,
- 0.4459152976624514,
- 0.3959230304322153,
- 0.3589495840431508
+ 0.015420421521022359,
+ -0.09669349440683989,
+ -0.091541890239276,
+ -0.09831875648223753,
+ -0.15889189005527926,
+ -0.3238028751507137,
+ -0.3021861133013793,
+ -0.4153169108791584,
+ -0.288250420951932,
+ -0.2441834151882844,
+ -0.1861432088236132,
+ -0.1118765948442563,
+ -0.1428880299543885,
+ -0.15671367265554081,
+ -0.08498116194286852,
+ -0.0692509229950365,
+ -0.07606337302593434,
+ -0.060206992686251126,
+ -0.026142709807686404,
+ -0.00556563158054774,
+ -0.009866779945955653,
+ 0.009431355567630549,
+ 0.07078980046219653,
+ 0.07539372073236464,
+ 0.16207370357320372,
+ 0.11977356856616517,
+ 0.12460094770881808,
+ 0.011489106540859231,
+ 0.061297015648956545,
+ -0.03033636746102892,
+ -0.6020273802156646,
+ -0.7123812037659562,
+ -0.4920115140550281,
+ -0.4367331844041445,
+ -0.33679269706742865,
+ -0.3306531304903035,
+ -0.37268131892762646,
+ -0.36381325777868373,
+ -0.25952419260997384,
+ -0.18277481600651196,
+ -0.17652165525332084,
+ -0.26795985859146876,
+ -0.2356920143971257,
+ -0.14144053670126078,
+ -0.21659461937637747,
+ -0.12520989827404583,
+ -0.11843103026845993,
+ -0.05123309801836999,
+ 0.017778540707878085,
+ 0.019144138672787272,
+ 0.11485746525887455,
+ 0.1437046480208089,
+ 0.17703849961320217,
+ 0.21687140567779414,
+ 0.2412548538027845,
+ 0.26551717672687575,
+ 0.26894090065731174,
+ 0.16554721389655586,
+ 0.2553299706098052,
+ 0.192509989938916,
+ 0.25013794649499754,
+ 0.2830880334454756,
+ 0.2923163455026178,
+ 0.37782690751231596,
+ 0.39185952268808383,
+ 0.41063160050511543,
+ 0.47333263361815836,
+ 0.48657173784167357,
+ 0.5073973709724179,
+ 0.564918911826964,
+ 0.5137082611579536,
+ 0.5292306692846602,
+ 0.5104702201253923,
+ 0.5700643730171614,
+ 0.6383615353248421,
+ 0.5751434907588598,
+ 0.608363052177717,
+ 0.7418948557066227,
+ 0.7789738740245858,
+ 0.8403221968569479,
+ 0.911768439137425,
+ 0.9829539849179558,
+ 0.9805055677552764,
+ 0.9324870355041249,
+ 0.8824947682738887,
+ 0.8455213218848242
]
}
],
@@ -4197,6 +4510,11 @@
"line": {
"color": "#E5ECF6",
"width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
}
},
"type": "bar"
@@ -4208,6 +4526,11 @@
"line": {
"color": "#E5ECF6",
"width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
}
},
"type": "barpolar"
@@ -4406,9 +4729,10 @@
"histogram": [
{
"marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
}
},
"type": "histogram"
@@ -4544,11 +4868,10 @@
],
"scatter": [
{
- "marker": {
- "colorbar": {
- "outlinewidth": 0,
- "ticks": ""
- }
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
},
"type": "scatter"
}
@@ -4726,6 +5049,7 @@
"arrowhead": 0,
"arrowwidth": 1
},
+ "autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
@@ -5004,23 +5328,9 @@
}
},
"text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " "
+ " }) }; }); "
]
},
"metadata": {},
@@ -5076,7 +5382,7 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
@@ -5094,7 +5400,7 @@
},
{
"cell_type": "code",
- "execution_count": 41,
+ "execution_count": 70,
"metadata": {},
"outputs": [
{
@@ -5134,31 +5440,31 @@
" \n",
" \n",
" 2018-06-01 | \n",
- " -0.019689 | \n",
- " -0.017329 | \n",
- " -0.011352 | \n",
- " 0.020826 | \n",
+ " 0.02529 | \n",
+ " -0.01857 | \n",
+ " -0.006528 | \n",
+ " 0.015420 | \n",
" \n",
" \n",
" 2018-09-01 | \n",
- " 0.082438 | \n",
- " 0.020819 | \n",
- " 0.022277 | \n",
- " 0.057522 | \n",
+ " -0.018328 | \n",
+ " 0.018824 | \n",
+ " 0.016548 | \n",
+ " -0.112114 | \n",
" \n",
" \n",
" 2018-12-01 | \n",
- " 0.023566 | \n",
- " 0.023138 | \n",
- " 0.042968 | \n",
- " -0.051211 | \n",
+ " 0.026316 | \n",
+ " 0.026002 | \n",
+ " 0.024862 | \n",
+ " 0.005152 | \n",
" \n",
" \n",
" 2019-03-01 | \n",
- " -0.159733 | \n",
- " -0.168071 | \n",
- " -0.16827 | \n",
- " 0.015522 | \n",
+ " -0.214458 | \n",
+ " -0.16628 | \n",
+ " -0.164245 | \n",
+ " -0.006777 | \n",
" \n",
" \n",
"\n",
@@ -5167,13 +5473,13 @@
"text/plain": [
" Mean-Var Equally Min-Var SPX\n",
"2018-03-01 0 0 0 0.000000\n",
- "2018-06-01 -0.019689 -0.017329 -0.011352 0.020826\n",
- "2018-09-01 0.082438 0.020819 0.022277 0.057522\n",
- "2018-12-01 0.023566 0.023138 0.042968 -0.051211\n",
- "2019-03-01 -0.159733 -0.168071 -0.16827 0.015522"
+ "2018-06-01 0.02529 -0.01857 -0.006528 0.015420\n",
+ "2018-09-01 -0.018328 0.018824 0.016548 -0.112114\n",
+ "2018-12-01 0.026316 0.026002 0.024862 0.005152\n",
+ "2019-03-01 -0.214458 -0.16628 -0.164245 -0.006777"
]
},
- "execution_count": 41,
+ "execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
@@ -5184,7 +5490,7 @@
},
{
"cell_type": "code",
- "execution_count": 42,
+ "execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
@@ -5200,17 +5506,17 @@
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 72,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "SPX 0.07556833348276859 0.1083325973664983\n",
- "Mean-Var 0.05512915349750012 0.2850812319781435\n",
- "Equally 0.07078786992416175 0.2729629255366962\n",
- "Min-Var 0.07598423294121649 0.26886702213764985\n"
+ "SPX -0.00522854196153727 0.13913436271090593\n",
+ "Mean-Var -0.12512200765811338 0.5267684626050293\n",
+ "Equally -0.12078845767808188 0.4858170592420743\n",
+ "Min-Var -0.12281867413974365 0.4877126592890122\n"
]
}
],
@@ -5223,8 +5529,10 @@
},
{
"cell_type": "code",
- "execution_count": 44,
- "metadata": {},
+ "execution_count": 75,
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [],
"source": [
"risk_free=0.015"
@@ -5232,7 +5540,7 @@
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
@@ -5244,17 +5552,17 @@
},
{
"cell_type": "code",
- "execution_count": 46,
+ "execution_count": 77,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "SPX Sharpe 0.5590961073134877\n",
- "Mean-Var Sharpe 0.14076392619411973\n",
- "Equally Sharpe 0.2043789273377414\n",
- "Min-Var Sharpe 0.22681931185296073\n"
+ "SPX Sharpe -0.1453885407415006\n",
+ "Mean-Var Sharpe -0.2660030309429834\n",
+ "Equally Sharpe -0.27950533044254594\n",
+ "Min-Var Sharpe -0.2825817036216689\n"
]
}
],
@@ -5268,28 +5576,101 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [],
"source": []
},
{
"cell_type": "code",
- "execution_count": 47,
- "metadata": {},
+ "execution_count": 78,
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [],
"source": [
- "insample_date=tradedate[1:51]"
+ "insample_date=tradedate"
]
},
{
"cell_type": "code",
- "execution_count": 48,
+ "execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "['2018-06-01',\n",
+ "['2001-03-01',\n",
+ " '2001-06-01',\n",
+ " '2001-09-01',\n",
+ " '2001-12-01',\n",
+ " '2002-03-01',\n",
+ " '2002-06-01',\n",
+ " '2002-09-01',\n",
+ " '2002-12-01',\n",
+ " '2003-03-01',\n",
+ " '2003-06-01',\n",
+ " '2003-09-01',\n",
+ " '2003-12-01',\n",
+ " '2004-03-01',\n",
+ " '2004-06-01',\n",
+ " '2004-09-01',\n",
+ " '2004-12-01',\n",
+ " '2005-03-01',\n",
+ " '2005-06-01',\n",
+ " '2005-09-01',\n",
+ " '2005-12-01',\n",
+ " '2006-03-01',\n",
+ " '2006-06-01',\n",
+ " '2006-09-01',\n",
+ " '2006-12-01',\n",
+ " '2007-03-01',\n",
+ " '2007-06-01',\n",
+ " '2007-09-01',\n",
+ " '2007-12-01',\n",
+ " '2008-03-01',\n",
+ " '2008-06-01',\n",
+ " '2008-09-01',\n",
+ " '2008-12-01',\n",
+ " '2009-03-01',\n",
+ " '2009-06-01',\n",
+ " '2009-09-01',\n",
+ " '2009-12-01',\n",
+ " '2010-03-01',\n",
+ " '2010-06-01',\n",
+ " '2010-09-01',\n",
+ " '2010-12-01',\n",
+ " '2011-03-01',\n",
+ " '2011-06-01',\n",
+ " '2011-09-01',\n",
+ " '2011-12-01',\n",
+ " '2012-03-01',\n",
+ " '2012-06-01',\n",
+ " '2012-09-01',\n",
+ " '2012-12-01',\n",
+ " '2013-03-01',\n",
+ " '2013-06-01',\n",
+ " '2013-09-01',\n",
+ " '2013-12-01',\n",
+ " '2014-03-01',\n",
+ " '2014-06-01',\n",
+ " '2014-09-01',\n",
+ " '2014-12-01',\n",
+ " '2015-03-01',\n",
+ " '2015-06-01',\n",
+ " '2015-09-01',\n",
+ " '2015-12-01',\n",
+ " '2016-03-01',\n",
+ " '2016-06-01',\n",
+ " '2016-09-01',\n",
+ " '2016-12-01',\n",
+ " '2017-03-01',\n",
+ " '2017-06-01',\n",
+ " '2017-09-01',\n",
+ " '2017-12-01',\n",
+ " '2018-03-01',\n",
+ " '2018-06-01',\n",
" '2018-09-01',\n",
" '2018-12-01',\n",
" '2019-03-01',\n",
@@ -5309,7 +5690,7 @@
" '2022-09-01']"
]
},
- "execution_count": 48,
+ "execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
@@ -5320,36 +5701,27 @@
},
{
"cell_type": "code",
- "execution_count": 49,
+ "execution_count": 80,
"metadata": {},
"outputs": [
{
- "data": {
- "text/plain": [
- "2018-06-01 0.020826\n",
- "2018-09-01 0.057522\n",
- "2018-12-01 -0.051211\n",
- "2019-03-01 0.015522\n",
- "2019-06-01 -0.018760\n",
- "2019-09-01 0.059594\n",
- "2019-12-01 0.068297\n",
- "2020-03-01 -0.063218\n",
- "2020-06-01 0.033220\n",
- "2020-09-01 0.133532\n",
- "2020-12-01 0.037079\n",
- "2021-03-01 0.061348\n",
- "2021-06-01 0.071446\n",
- "2021-09-01 0.071186\n",
- "2021-12-01 -0.002448\n",
- "2022-03-01 -0.048019\n",
- "2022-06-01 -0.049992\n",
- "2022-09-01 -0.036973\n",
- "Name: SPX, dtype: float64"
- ]
- },
- "execution_count": 49,
- "metadata": {},
- "output_type": "execute_result"
+ "ename": "KeyError",
+ "evalue": "\"['2001-03-01', '2001-06-01', '2001-09-01', '2001-12-01', '2002-03-01', '2002-06-01', '2002-09-01', '2002-12-01', '2003-03-01', '2003-06-01', '2003-09-01', '2003-12-01', '2004-03-01', '2004-06-01', '2004-09-01', '2004-12-01', '2005-03-01', '2005-06-01', '2005-09-01', '2005-12-01', '2006-03-01', '2006-06-01', '2006-09-01', '2006-12-01', '2007-03-01', '2007-06-01', '2007-09-01', '2007-12-01', '2008-03-01', '2008-06-01', '2008-09-01', '2008-12-01', '2009-03-01', '2009-06-01', '2009-09-01', '2009-12-01', '2010-03-01', '2010-06-01', '2010-09-01', '2010-12-01', '2011-03-01', '2011-06-01', '2011-09-01', '2011-12-01', '2012-03-01', '2012-06-01', '2012-09-01', '2012-12-01', '2013-03-01', '2013-06-01', '2013-09-01', '2013-12-01', '2014-03-01', '2014-06-01', '2014-09-01', '2014-12-01', '2015-03-01', '2015-06-01', '2015-09-01', '2015-12-01', '2016-03-01', '2016-06-01', '2016-09-01', '2016-12-01', '2017-03-01', '2017-06-01', '2017-09-01', '2017-12-01'] not in index\"",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "\u001b[1;32m/home/wenbiaolin/20221117/fundamental_back_testing.ipynb Cell 65\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m quarter_return[\u001b[39m'\u001b[39;49m\u001b[39mSPX\u001b[39;49m\u001b[39m'\u001b[39;49m][insample_date]\n",
+ "File \u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/series.py:984\u001b[0m, in \u001b[0;36mSeries.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 981\u001b[0m key \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39masarray(key, dtype\u001b[39m=\u001b[39m\u001b[39mbool\u001b[39m)\n\u001b[1;32m 982\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_values(key)\n\u001b[0;32m--> 984\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_get_with(key)\n",
+ "File \u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/series.py:1024\u001b[0m, in \u001b[0;36mSeries._get_with\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1021\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39miloc[key]\n\u001b[1;32m 1023\u001b[0m \u001b[39m# handle the dup indexing case GH#4246\u001b[39;00m\n\u001b[0;32m-> 1024\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mloc[key]\n",
+ "File \u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/indexing.py:967\u001b[0m, in \u001b[0;36m_LocationIndexer.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 964\u001b[0m axis \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39maxis \u001b[39mor\u001b[39;00m \u001b[39m0\u001b[39m\n\u001b[1;32m 966\u001b[0m maybe_callable \u001b[39m=\u001b[39m com\u001b[39m.\u001b[39mapply_if_callable(key, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj)\n\u001b[0;32m--> 967\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_getitem_axis(maybe_callable, axis\u001b[39m=\u001b[39;49maxis)\n",
+ "File \u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/indexing.py:1191\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1188\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(key, \u001b[39m\"\u001b[39m\u001b[39mndim\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mand\u001b[39;00m key\u001b[39m.\u001b[39mndim \u001b[39m>\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m 1189\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mCannot index with multidimensional key\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m-> 1191\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_getitem_iterable(key, axis\u001b[39m=\u001b[39;49maxis)\n\u001b[1;32m 1193\u001b[0m \u001b[39m# nested tuple slicing\u001b[39;00m\n\u001b[1;32m 1194\u001b[0m \u001b[39mif\u001b[39;00m is_nested_tuple(key, labels):\n",
+ "File \u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/indexing.py:1132\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_iterable\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1129\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_validate_key(key, axis)\n\u001b[1;32m 1131\u001b[0m \u001b[39m# A collection of keys\u001b[39;00m\n\u001b[0;32m-> 1132\u001b[0m keyarr, indexer \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_get_listlike_indexer(key, axis)\n\u001b[1;32m 1133\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj\u001b[39m.\u001b[39m_reindex_with_indexers(\n\u001b[1;32m 1134\u001b[0m {axis: [keyarr, indexer]}, copy\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m, allow_dups\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m\n\u001b[1;32m 1135\u001b[0m )\n",
+ "File \u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/indexing.py:1327\u001b[0m, in \u001b[0;36m_LocIndexer._get_listlike_indexer\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1324\u001b[0m ax \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj\u001b[39m.\u001b[39m_get_axis(axis)\n\u001b[1;32m 1325\u001b[0m axis_name \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj\u001b[39m.\u001b[39m_get_axis_name(axis)\n\u001b[0;32m-> 1327\u001b[0m keyarr, indexer \u001b[39m=\u001b[39m ax\u001b[39m.\u001b[39;49m_get_indexer_strict(key, axis_name)\n\u001b[1;32m 1329\u001b[0m \u001b[39mreturn\u001b[39;00m keyarr, indexer\n",
+ "File \u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py:5782\u001b[0m, in \u001b[0;36mIndex._get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m 5779\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 5780\u001b[0m keyarr, indexer, new_indexer \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_reindex_non_unique(keyarr)\n\u001b[0;32m-> 5782\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_raise_if_missing(keyarr, indexer, axis_name)\n\u001b[1;32m 5784\u001b[0m keyarr \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtake(indexer)\n\u001b[1;32m 5785\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(key, Index):\n\u001b[1;32m 5786\u001b[0m \u001b[39m# GH 42790 - Preserve name from an Index\u001b[39;00m\n",
+ "File \u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py:5845\u001b[0m, in \u001b[0;36mIndex._raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m 5842\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mNone of [\u001b[39m\u001b[39m{\u001b[39;00mkey\u001b[39m}\u001b[39;00m\u001b[39m] are in the [\u001b[39m\u001b[39m{\u001b[39;00maxis_name\u001b[39m}\u001b[39;00m\u001b[39m]\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 5844\u001b[0m not_found \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(ensure_index(key)[missing_mask\u001b[39m.\u001b[39mnonzero()[\u001b[39m0\u001b[39m]]\u001b[39m.\u001b[39munique())\n\u001b[0;32m-> 5845\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mnot_found\u001b[39m}\u001b[39;00m\u001b[39m not in index\u001b[39m\u001b[39m\"\u001b[39m)\n",
+ "\u001b[0;31mKeyError\u001b[0m: \"['2001-03-01', '2001-06-01', '2001-09-01', '2001-12-01', '2002-03-01', '2002-06-01', '2002-09-01', '2002-12-01', '2003-03-01', '2003-06-01', '2003-09-01', '2003-12-01', '2004-03-01', '2004-06-01', '2004-09-01', '2004-12-01', '2005-03-01', '2005-06-01', '2005-09-01', '2005-12-01', '2006-03-01', '2006-06-01', '2006-09-01', '2006-12-01', '2007-03-01', '2007-06-01', '2007-09-01', '2007-12-01', '2008-03-01', '2008-06-01', '2008-09-01', '2008-12-01', '2009-03-01', '2009-06-01', '2009-09-01', '2009-12-01', '2010-03-01', '2010-06-01', '2010-09-01', '2010-12-01', '2011-03-01', '2011-06-01', '2011-09-01', '2011-12-01', '2012-03-01', '2012-06-01', '2012-09-01', '2012-12-01', '2013-03-01', '2013-06-01', '2013-09-01', '2013-12-01', '2014-03-01', '2014-06-01', '2014-09-01', '2014-12-01', '2015-03-01', '2015-06-01', '2015-09-01', '2015-12-01', '2016-03-01', '2016-06-01', '2016-09-01', '2016-12-01', '2017-03-01', '2017-06-01', '2017-09-01', '2017-12-01'] not in index\""
+ ]
}
],
"source": [
@@ -5358,21 +5730,21 @@
},
{
"cell_type": "code",
- "execution_count": 50,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "SPX 0.0797665742318113 0.11107500724034111\n",
- "Mean-Var 0.05819188424736124 0.2932657439273469\n",
- "Equally 0.07472052936439295 0.28073803260205243\n",
- "Min-Var 0.08020557921572852 0.2764998639318288\n",
- "SPX Sharpe 0.5830886338964726\n",
- "Mean-Var Sharpe 0.14727899572908018\n",
- "Equally Sharpe 0.21272689279349302\n",
- "Min-Var Sharpe 0.23582499567451864\n"
+ "SPX 0.001531531093822985 0.21955274735546973\n",
+ "Mean-Var 0.2565791830967157 0.29155309241664823\n",
+ "Equally 0.26616405706243595 0.3011567507349648\n",
+ "Min-Var 0.26844700713719916 0.29823334992044453\n",
+ "SPX Sharpe -0.061345025595925314\n",
+ "Mean-Var Sharpe 0.828594137329483\n",
+ "Equally Sharpe 0.8339977651155983\n",
+ "Min-Var Sharpe 0.8498278519314074\n"
]
}
],
@@ -5402,21 +5774,16 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3.8.10 64-bit",
"language": "python",
"name": "python3"
},
@@ -5430,7 +5797,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.7"
+ "version": "3.8.10"
},
"vscode": {
"interpreter": {
diff --git a/ml_model.py b/ml_model.py
index 50f364033..dd9e467b9 100644
--- a/ml_model.py
+++ b/ml_model.py
@@ -18,14 +18,18 @@
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV,RandomizedSearchCV
-from keras.models import Sequential
-from keras.layers import Dense
-from keras.layers import LSTM
-from keras.layers import Dropout
+
+from xgboost import XGBRegressor
+from lightgbm import LGBMRegressor
+import time
import os
import errno
+from multiprocessing import cpu_count
+
+n_cpus = cpu_count() - 1
+
def prepare_rolling_train(df,features_column,label_column,date_column,unique_datetime,testing_windows,first_trade_date_index, max_rolling_window_index,current_index):
if current_index <=max_rolling_window_index:
@@ -117,9 +121,12 @@ def train_random_forest(X_train, y_train):
# scoring_method = 'neg_mean_absolute_error'
scoring_method = 'neg_mean_squared_error'
#scoring_method = 'neg_mean_squared_log_error'
-
+ n_models = 1
+ for key, val in random_grid.items():
+ n_models *= len(val)
+ n_jobs_per_model = min(max(1, n_cpus//n_models), n_cpus)
# my_cv_rf = TimeSeriesSplit(n_splits=5).split(X_train_rf)
- rf = RandomForestRegressor(random_state=42)
+ rf = RandomForestRegressor(random_state=42, n_jobs= n_jobs_per_model)
#RandomizedSearchCV
#randomforest_regressor = RandomizedSearchCV(estimator=rf,
# param_distributions=random_grid,
@@ -132,7 +139,7 @@ def train_random_forest(X_train, y_train):
randomforest_regressor = GridSearchCV(estimator=rf,
param_grid=random_grid,
cv=3,
- n_jobs=-1,
+ n_jobs=n_cpus // n_jobs_per_model,
scoring=scoring_method,
verbose=0)
@@ -171,18 +178,24 @@ def train_svm(X_train, y_train):
return model
-def train_gbm(X_train, y_train):
- gbm = GradientBoostingRegressor(random_state = 42)
+def train_lightgbm(X_train, y_train):
+
+
# model = gbm.fit(X_train, y_train)
param_grid_gbm = {'learning_rate': [0.1, 0.01, 0.001], 'n_estimators': [100, 250, 500,1000]}
+ n_models = 1
+ for key, val in param_grid_gbm.items():
+ n_models *= len(val)
+ n_jobs_per_model = min(max(1, n_cpus//n_models), n_cpus)
+ lightgbm = LGBMRegressor(random_state = 42, n_jobs=n_jobs_per_model)
# scoring_method = 'r2'
# scoring_method = 'explained_variance'
# scoring_method = 'neg_mean_absolute_error'
scoring_method = 'neg_mean_squared_error'
#scoring_method = 'neg_mean_squared_log_error'
- gbm_regressor = GridSearchCV(estimator=gbm, param_grid=param_grid_gbm,
- cv=3, n_jobs=-1, scoring=scoring_method, verbose=0)
+ gbm_regressor = GridSearchCV(estimator=lightgbm, param_grid=param_grid_gbm,
+ cv=3, n_jobs=n_cpus // n_jobs_per_model, scoring=scoring_method, verbose=0)
gbm_regressor.fit(X_train, y_train)
model = gbm_regressor.best_estimator_
@@ -195,7 +208,31 @@ def train_gbm(X_train, y_train):
+def train_xgb(X_train, y_train):
+ xgb = XGBRegressor(random_state = 42, n_jobs=10)
+
+ param_grid_gbm = {'learning_rate': [0.1, 0.01, 0.001], 'n_estimators': [100, 250, 500,1000]}
+ n_models = 1
+ for key, val in param_grid_gbm.items():
+ n_models *= len(val)
+ n_jobs_per_model = min(max(1, n_cpus//n_models), n_cpus)
+ xgb = XGBRegressor(random_state = 42, n_jobs=n_jobs_per_model)
+ # scoring_method = 'r2'
+ # scoring_method = 'explained_variance'
+ # scoring_method = 'neg_mean_absolute_error'
+ scoring_method = 'neg_mean_squared_error'
+ #scoring_method = 'neg_mean_squared_log_error'
+ xgb_regressor = GridSearchCV(estimator=xgb, param_grid=param_grid_gbm,
+ cv=3, n_jobs=n_cpus // n_jobs_per_model, scoring=scoring_method, verbose=0)
+ xgb_regressor.fit(X_train, y_train)
+ model = xgb_regressor.best_estimator_
+ '''
+
+ gbm_regressor = GradientBoostingRegressor()
+ model = gbm_regressor.fit(X_train, y_train)
+ '''
+ return model
def train_ada(X_train, y_train):
ada = AdaBoostRegressor()
@@ -255,18 +292,17 @@ def run_4model(df,features_column, label_column,date_column,tic_column,
max_rolling_window_index=44):
## initialize all the result tables
## need date as index and unique tic name as columns
- df_predict_lr = pd.DataFrame(columns=unique_ticker, index=trade_date)
df_predict_rf = pd.DataFrame(columns=unique_ticker, index=trade_date)
- df_predict_ridge = pd.DataFrame(columns=unique_ticker, index=trade_date)
df_predict_gbm = pd.DataFrame(columns=unique_ticker, index=trade_date)
-
+ df_predict_xgb = pd.DataFrame(columns=unique_ticker, index=trade_date)
df_predict_best = pd.DataFrame(columns=unique_ticker, index=trade_date)
df_best_model_name = pd.DataFrame(columns=['model_name'], index=trade_date)
evaluation_record = {}
# first trade date is 1995-06-01
# fist_trade_date_index = 20
# testing_windows = 6
-
+ import re
+ df = df.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
for i in range(first_trade_date_index, len(unique_datetime)):
try:
# prepare training data
@@ -303,33 +339,38 @@ def run_4model(df,features_column, label_column,date_column,tic_column,
current_index=i)
# Training
- lr_model = train_linear_regression(X_train, y_train)
+ # lr_model = train_linear_regression(X_train, y_train)
+
+ t = time.perf_counter()
+ xgb_model = train_xgb(X_train, y_train)
+ print(f"xgb:{time.perf_counter() - t}s")
+ t = time.perf_counter()
+ gbm_model = train_lightgbm(X_train, y_train)
+ print(f"gbm:{time.perf_counter() - t}s")
+ t =time.perf_counter()
rf_model = train_random_forest(X_train, y_train)
- ridge_model = train_ridge(X_train, y_train)
- gbm_model = train_gbm(X_train, y_train)
+ print(f"rf:{time.perf_counter() - t}s")
+ # ridge_model = train_ridge(X_train, y_train)
+
# Validation
- lr_eval = evaluate_model(lr_model, X_test, y_test)
rf_eval = evaluate_model(rf_model, X_test, y_test)
- ridge_eval = evaluate_model(ridge_model, X_test, y_test)
- gbm_eval = evaluate_model(gbm_model, X_test, y_test)
-
+ xgb_eval = evaluate_model(xgb_model, X_test, y_test)
+ gbm_eval = evaluate_model(gbm_model, X_test ,y_test)
# Trading
- y_trade_lr = lr_model.predict(X_trade)
+
y_trade_rf = rf_model.predict(X_trade)
- y_trade_ridge = ridge_model.predict(X_trade)
- y_trade_gbm = gbm_model.predict(X_trade)
-
-
+ y_trade_xgb = xgb_model.predict(X_trade)
+ y_trade_gbm = gbm_model.predict(X_trade)
# Decide the best model
- eval_data = [[lr_eval, y_trade_lr],
+ eval_data = [
[rf_eval, y_trade_rf] ,
- [ridge_eval, y_trade_ridge],
+ [xgb_eval, y_trade_xgb],
[gbm_eval, y_trade_gbm]
]
eval_table = pd.DataFrame(eval_data, columns=['model_eval', 'model_predict_return'],
- index=['lr', 'rf','ridge','gbm'])
+ index=['rf', 'xgb', 'gbm'])
evaluation_record[unique_datetime[i]]=eval_table
@@ -345,11 +386,9 @@ def run_4model(df,features_column, label_column,date_column,tic_column,
df_best_model_name.loc[unique_datetime[i]] = best_model_name
# Prepare Predicted Return table
- append_return_table(df_predict_lr, unique_datetime, y_trade_lr, trade_tic, current_index=i)
append_return_table(df_predict_rf, unique_datetime, y_trade_rf, trade_tic, current_index=i)
- append_return_table(df_predict_ridge, unique_datetime, y_trade_ridge, trade_tic, current_index=i)
+ append_return_table(df_predict_xgb, unique_datetime, y_trade_xgb, trade_tic, current_index=i)
append_return_table(df_predict_gbm, unique_datetime, y_trade_gbm, trade_tic, current_index=i)
-
append_return_table(df_predict_best, unique_datetime, y_trade_best, trade_tic, current_index=i)
print('Trade Date: ', unique_datetime[i])
@@ -357,10 +396,10 @@ def run_4model(df,features_column, label_column,date_column,tic_column,
except Exception:
traceback.print_exc()
df_evaluation = get_model_evaluation_table(evaluation_record,trade_date)
- return (df_predict_lr,
+ return (
df_predict_rf,
- df_predict_ridge,
df_predict_gbm,
+ df_predict_xgb,
df_predict_best,
df_best_model_name,
evaluation_record,
@@ -374,20 +413,19 @@ def get_model_evaluation_table(evaluation_record,trade_date):
evaluation_list.append(evaluation_record[d]['model_eval'].values)
except:
print('error')
- df_evaluation = pd.DataFrame(evaluation_list,columns = ['linear_regression', 'random_forest','ridge','gbm'])
+ df_evaluation = pd.DataFrame(evaluation_list,columns = ['rf', 'xgb', 'gbm'])
df_evaluation.index = trade_date
return df_evaluation
def save_model_result(sector_result,sector_name):
- df_predict_lr = sector_result[0].astype(np.float64)
- df_predict_rf = sector_result[1].astype(np.float64)
- df_predict_ridge = sector_result[2].astype(np.float64)
- df_predict_gbm = sector_result[3].astype(np.float64)
- df_predict_best = sector_result[4].astype(np.float64)
-
- df_best_model_name = sector_result[5]
- df_evaluation_score = sector_result[6]
- df_model_score = sector_result[7]
+ df_predict_rf = sector_result[0].astype(np.float64)
+ df_predict_gbm = sector_result[1].astype(np.float64)
+ df_predict_xgb = sector_result[2].astype(np.float64)
+ df_predict_best = sector_result[3].astype(np.float64)
+
+ df_best_model_name = sector_result[4]
+ df_evaluation_score = sector_result[5]
+ df_model_score = sector_result[6]
@@ -400,10 +438,9 @@ def save_model_result(sector_result,sector_name):
raise
- df_predict_lr.to_csv('results/'+sector_name+'/df_predict_lr.csv')
df_predict_rf.to_csv('results/'+sector_name+'/df_predict_rf.csv')
- df_predict_ridge.to_csv('results/'+sector_name+'/df_predict_ridge.csv')
df_predict_gbm.to_csv('results/'+sector_name+'/df_predict_gbm.csv')
+ df_predict_xgb.to_csv('results/'+sector_name+'/df_predict_xgb.csv')
df_predict_best.to_csv('results/'+sector_name+'/df_predict_best.csv')
df_best_model_name.to_csv('results/'+sector_name+'/df_best_model_name.csv')
#df_evaluation_score.to_csv('results/'+sector_name+'/df_evaluation_score.csv')
| |