From 7cb9f41d9013a02d2afbc1bac9d3129c845d2195 Mon Sep 17 00:00:00 2001 From: Ogaday Date: Wed, 31 Oct 2018 18:27:39 +0000 Subject: [PATCH 1/5] Doc: Add setup instructions to README --- README.md | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 494c576..1506664 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,29 @@ # Blog-Posts -This is the Repo for the Notebooks for our Blog Posts + +This is the Repo for the Notebooks for our Blog Posts. + +## Setup & Running + +To run notebooks in this repo, you'll need to recreate the kernel used to +create them. You can do this quite simply. First, create the conda env by +navigating to the project directory and running: + +``` +$ conda env create -f env.yml +``` + +Then you'll need to register this environment as an ipython kernel [1]. Use the +following command: + +``` +$ conda activate +(auto-arima) $ python -m ipykernel install --user --name auto-arima --display-name "Auto-Arima (Py3)" +``` + +Now you'll be able to run the notebooks via Jupyter notebook or Jupyterhub. + +If you don't have `conda` installed, you can get it with the `miniconda` +distribtution [2]. + +[1]: https://ipython.readthedocs.io/en/stable/install/kernel_install.html +[2]: https://conda.io/miniconda.html From 22661cefe49314210ccd52296f7438bf014b44e4 Mon Sep 17 00:00:00 2001 From: Ogaday Date: Wed, 31 Oct 2018 18:33:38 +0000 Subject: [PATCH 2/5] Feat: Add environment file --- Forecasting a Time Series in Python.ipynb | 6 +++--- env.yml | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 env.yml diff --git a/Forecasting a Time Series in Python.ipynb b/Forecasting a Time Series in Python.ipynb index e082305..b9f56ed 100644 --- a/Forecasting a Time Series in Python.ipynb +++ b/Forecasting a Time Series in Python.ipynb @@ -1243,9 +1243,9 @@ "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Auto-Arima (Py3)", "language": "python", - "name": "python3" + "name": "auto-arima" }, "language_info": { "codemirror_mode": { @@ -1257,7 +1257,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.1" + "version": "3.6.6" } }, "nbformat": 4, diff --git a/env.yml b/env.yml new file mode 100644 index 0000000..d10fe4f --- /dev/null +++ b/env.yml @@ -0,0 +1,14 @@ +name: auto-arima +channels: + - defaults +dependencies: + - ipykernel=5.1.0 + - matplotlib=3.0.1 + - pandas=0.23.4 + - python=3.6 + - scikit-learn=0.20.0 + - statsmodels=0.9.0 + - pip: + - cufflinks==0.14.5 + - plotly==3.3.0 + - pyramid-arima==0.8.1 From 5b0d76318e11806d14a6de379abfb4cf4e5b2d34 Mon Sep 17 00:00:00 2001 From: Ogaday Date: Wed, 31 Oct 2018 18:38:19 +0000 Subject: [PATCH 3/5] Doc: Update link formatting --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1506664..7e6488c 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ navigating to the project directory and running: $ conda env create -f env.yml ``` -Then you'll need to register this environment as an ipython kernel [1]. Use the +Then you'll need to register this environment as an ipython kernel ([1]). Use the following command: ``` @@ -23,7 +23,7 @@ $ conda activate Now you'll be able to run the notebooks via Jupyter notebook or Jupyterhub. If you don't have `conda` installed, you can get it with the `miniconda` -distribtution [2]. +distribtution ([2]). [1]: https://ipython.readthedocs.io/en/stable/install/kernel_install.html [2]: https://conda.io/miniconda.html From 41204caadd4c6d3664df4f8f43d33eec8c05c607 Mon Sep 17 00:00:00 2001 From: Ogaday Date: Wed, 31 Oct 2018 21:14:52 +0000 Subject: [PATCH 4/5] Fix: Prevent tuning of parameters on whole dataset As `auto_arima` is run on the whole dataset, there is leakage of algorithm parameters and the accuracy score for the test set might be inflated as the test set was used to chose the ARIMA parameters. It's for this same reason that Kaggle has a private leaderboard etc. In order to prevent this I've simply split the data between train and test before tuning the parameters. --- Forecasting a Time Series in Python.ipynb | 467 ++++++++++------------ 1 file changed, 207 insertions(+), 260 deletions(-) diff --git a/Forecasting a Time Series in Python.ipynb b/Forecasting a Time Series in Python.ipynb index b9f56ed..d112ea8 100644 --- a/Forecasting a Time Series in Python.ipynb +++ b/Forecasting a Time Series in Python.ipynb @@ -27,18 +27,18 @@ "data": { "text/html": [ "
\n", - "\n", "\n", " \n", @@ -133,9 +133,7 @@ { "cell_type": "code", "execution_count": 3, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "data.index = pd.to_datetime(data.index)" @@ -150,18 +148,18 @@ "data": { "text/html": [ "
\n", - "\n", "
\n", " \n", @@ -261,18 +259,18 @@ "data": { "text/html": [ "
\n", - "\n", "
\n", " \n", @@ -315,9 +313,7 @@ { "cell_type": "code", "execution_count": 7, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "data.columns = ['Energy Production']" @@ -332,18 +328,18 @@ "data": { "text/html": [ "
\n", - "\n", "
\n", " \n", @@ -402,10 +398,8 @@ }, { "cell_type": "code", - "execution_count": 49, - "metadata": { - "collapsed": true - }, + "execution_count": 9, + "metadata": {}, "outputs": [], "source": [ "import plotly\n", @@ -465,100 +459,6 @@ "outputs": [], "source": [] }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Marcial\\Anaconda3\\lib\\site-packages\\statsmodels\\compat\\pandas.py:56: FutureWarning:\n", - "\n", - "The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.\n", - "\n" - ] - } - ], - "source": [ - "from pyramid.arima import auto_arima" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**he AIC measures how well a model fits the data while taking into account the overall complexity of the model. A model that fits the data very well while using lots of features will be assigned a larger AIC score than a model that uses fewer features to achieve the same goodness-of-fit. Therefore, we are interested in finding the model that yields the lowest AIC value." - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=1782.527, BIC=1802.447, Fit time=0.866 seconds\n", - "Fit ARIMA: order=(0, 1, 0) seasonal_order=(0, 1, 0, 12); AIC=nan, BIC=nan, Fit time=nan seconds\n", - "Fit ARIMA: order=(1, 1, 0) seasonal_order=(1, 1, 0, 12); AIC=1942.040, BIC=1957.976, Fit time=0.238 seconds\n", - "Fit ARIMA: order=(0, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=1837.289, BIC=1853.224, Fit time=0.361 seconds\n", - "Fit ARIMA: order=(1, 1, 1) seasonal_order=(1, 1, 1, 12); AIC=1783.875, BIC=1807.778, Fit time=1.434 seconds\n", - "Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 0, 12); AIC=1920.884, BIC=1936.820, Fit time=0.451 seconds\n", - "Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 2, 12); AIC=1784.212, BIC=1808.116, Fit time=3.491 seconds\n", - "Fit ARIMA: order=(1, 1, 1) seasonal_order=(1, 1, 2, 12); AIC=1781.900, BIC=1809.788, Fit time=3.586 seconds\n", - "Fit ARIMA: order=(0, 1, 1) seasonal_order=(1, 1, 2, 12); AIC=1837.164, BIC=1861.067, Fit time=1.287 seconds\n", - "Fit ARIMA: order=(2, 1, 1) seasonal_order=(1, 1, 2, 12); AIC=1782.648, BIC=1814.520, Fit time=4.348 seconds\n", - "Fit ARIMA: order=(1, 1, 0) seasonal_order=(1, 1, 2, 12); AIC=1852.587, BIC=1876.490, Fit time=1.008 seconds\n", - "Fit ARIMA: order=(1, 1, 2) seasonal_order=(1, 1, 2, 12); AIC=1781.940, BIC=1813.811, Fit time=4.222 seconds\n", - "Fit ARIMA: order=(0, 1, 0) seasonal_order=(1, 1, 2, 12); AIC=1864.184, BIC=1884.103, Fit time=1.035 seconds\n", - "Fit ARIMA: order=(2, 1, 2) seasonal_order=(1, 1, 2, 12); AIC=1782.722, BIC=1818.578, Fit time=5.144 seconds\n", - "Fit ARIMA: order=(1, 1, 1) seasonal_order=(2, 1, 2, 12); AIC=1772.539, BIC=1804.410, Fit time=4.551 seconds\n", - "Fit ARIMA: order=(1, 1, 1) seasonal_order=(2, 1, 1, 12); AIC=1771.295, BIC=1799.182, Fit time=2.270 seconds\n", - "Fit ARIMA: order=(1, 1, 1) seasonal_order=(1, 1, 0, 12); AIC=1870.049, BIC=1889.969, Fit time=0.974 seconds\n", - "Fit ARIMA: order=(0, 1, 1) seasonal_order=(2, 1, 1, 12); AIC=1825.210, BIC=1849.114, Fit time=0.999 seconds\n", - "Fit ARIMA: order=(2, 1, 1) seasonal_order=(2, 1, 1, 12); AIC=1772.010, BIC=1803.881, Fit time=3.391 seconds\n", - "Fit ARIMA: order=(1, 1, 0) seasonal_order=(2, 1, 1, 12); AIC=1842.551, BIC=1866.454, Fit time=0.808 seconds\n", - "Fit ARIMA: order=(1, 1, 2) seasonal_order=(2, 1, 1, 12); AIC=1771.612, BIC=1803.484, Fit time=4.359 seconds\n", - "Fit ARIMA: order=(0, 1, 0) seasonal_order=(2, 1, 1, 12); AIC=1855.606, BIC=1875.526, Fit time=0.641 seconds\n", - "Fit ARIMA: order=(2, 1, 2) seasonal_order=(2, 1, 1, 12); AIC=1773.049, BIC=1808.904, Fit time=5.352 seconds\n", - "Fit ARIMA: order=(1, 1, 1) seasonal_order=(2, 1, 0, 12); AIC=1813.388, BIC=1837.291, Fit time=3.069 seconds\n", - "Total fit time: 53.889 seconds\n" - ] - } - ], - "source": [ - "stepwise_model = auto_arima(data, start_p=1, start_q=1,\n", - " max_p=3, max_q=3, m=12,\n", - " start_P=0, seasonal=True,\n", - " d=1, D=1, trace=True,\n", - " error_action='ignore', \n", - " suppress_warnings=True, \n", - " stepwise=True) " - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1771.2948217037836" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "stepwise_model.aic()" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -568,25 +468,25 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", - "\n", "
\n", " \n", @@ -634,7 +534,7 @@ "1985-05-01 55.3151" ] }, - "execution_count": 32, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -645,7 +545,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -657,7 +557,7 @@ "Data columns (total 1 columns):\n", "Energy Production 397 non-null float64\n", "dtypes: float64(1)\n", - "memory usage: 26.2 KB\n" + "memory usage: 6.2 KB\n" ] } ], @@ -674,10 +574,8 @@ }, { "cell_type": "code", - "execution_count": 34, - "metadata": { - "collapsed": true - }, + "execution_count": 12, + "metadata": {}, "outputs": [], "source": [ "train = data.loc['1985-01-01':'2016-12-01']" @@ -685,25 +583,25 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", - "\n", "
\n", " \n", @@ -751,7 +649,7 @@ "2016-12-01 112.7694" ] }, - "execution_count": 35, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -762,10 +660,8 @@ }, { "cell_type": "code", - "execution_count": 36, - "metadata": { - "collapsed": true - }, + "execution_count": 14, + "metadata": {}, "outputs": [], "source": [ "test = data.loc['2015-01-01':]" @@ -773,25 +669,25 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", - "\n", "
\n", " \n", @@ -839,7 +735,7 @@ "2015-05-01 91.0930" ] }, - "execution_count": 37, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -850,25 +746,25 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", - "\n", "
\n", " \n", @@ -916,7 +812,7 @@ "2018-01-01 129.4048" ] }, - "execution_count": 38, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -927,7 +823,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -936,7 +832,7 @@ "37" ] }, - "execution_count": 39, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -947,59 +843,124 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "from pyramid.arima import auto_arima" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**he AIC measures how well a model fits the data while taking into account the overall complexity of the model. A model that fits the data very well while using lots of features will be assigned a larger AIC score than a model that uses fewer features to achieve the same goodness-of-fit. Therefore, we are interested in finding the model that yields the lowest AIC value." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=1686.946, BIC=1706.527, Fit time=1.348 seconds\n", + "Fit ARIMA: order=(0, 1, 0) seasonal_order=(0, 1, 0, 12); AIC=1897.389, BIC=1905.221, Fit time=0.023 seconds\n", + "Fit ARIMA: order=(1, 1, 0) seasonal_order=(1, 1, 0, 12); AIC=1838.277, BIC=1853.942, Fit time=0.281 seconds\n", + "Fit ARIMA: order=(0, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=1745.226, BIC=1760.891, Fit time=0.243 seconds\n", + "Fit ARIMA: order=(1, 1, 1) seasonal_order=(1, 1, 1, 12); AIC=1687.289, BIC=1710.786, Fit time=1.478 seconds\n", + "Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 0, 12); AIC=1813.531, BIC=1829.196, Fit time=0.433 seconds\n", + "Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 2, 12); AIC=1686.349, BIC=1709.846, Fit time=3.457 seconds\n", + "Fit ARIMA: order=(0, 1, 1) seasonal_order=(0, 1, 2, 12); AIC=1744.461, BIC=1764.042, Fit time=1.203 seconds\n", + "Fit ARIMA: order=(2, 1, 1) seasonal_order=(0, 1, 2, 12); AIC=1686.471, BIC=1713.884, Fit time=4.320 seconds\n", + "Fit ARIMA: order=(1, 1, 0) seasonal_order=(0, 1, 2, 12); AIC=1758.825, BIC=1778.406, Fit time=0.884 seconds\n", + "Fit ARIMA: order=(1, 1, 2) seasonal_order=(0, 1, 2, 12); AIC=1685.864, BIC=1713.277, Fit time=4.383 seconds\n", + "Fit ARIMA: order=(2, 1, 3) seasonal_order=(0, 1, 2, 12); AIC=1689.714, BIC=1724.960, Fit time=6.895 seconds\n", + "Fit ARIMA: order=(1, 1, 2) seasonal_order=(1, 1, 2, 12); AIC=1683.989, BIC=1715.319, Fit time=3.750 seconds\n", + "Fit ARIMA: order=(1, 1, 2) seasonal_order=(1, 1, 1, 12); AIC=1686.996, BIC=1714.410, Fit time=2.271 seconds\n", + "Fit ARIMA: order=(1, 1, 2) seasonal_order=(0, 1, 1, 12); AIC=1686.893, BIC=1710.390, Fit time=1.553 seconds\n", + "Fit ARIMA: order=(0, 1, 2) seasonal_order=(1, 1, 2, 12); AIC=1695.321, BIC=1722.734, Fit time=3.538 seconds\n", + "Fit ARIMA: order=(2, 1, 2) seasonal_order=(1, 1, 2, 12); AIC=1687.095, BIC=1722.340, Fit time=6.486 seconds\n", + "Fit ARIMA: order=(1, 1, 1) seasonal_order=(1, 1, 2, 12); AIC=1684.654, BIC=1712.068, Fit time=8.034 seconds\n", + "Fit ARIMA: order=(1, 1, 3) seasonal_order=(1, 1, 2, 12); AIC=1685.565, BIC=1720.811, Fit time=7.571 seconds\n", + "Fit ARIMA: order=(0, 1, 1) seasonal_order=(1, 1, 2, 12); AIC=1743.820, BIC=1767.317, Fit time=1.797 seconds\n", + "Fit ARIMA: order=(2, 1, 3) seasonal_order=(1, 1, 2, 12); AIC=1687.572, BIC=1726.734, Fit time=7.718 seconds\n", + "Fit ARIMA: order=(1, 1, 2) seasonal_order=(2, 1, 2, 12); AIC=1680.474, BIC=1715.720, Fit time=6.542 seconds\n", + "Fit ARIMA: order=(1, 1, 2) seasonal_order=(2, 1, 1, 12); AIC=1678.805, BIC=1710.134, Fit time=5.650 seconds\n", + "Fit ARIMA: order=(1, 1, 2) seasonal_order=(1, 1, 0, 12); AIC=1765.525, BIC=1789.022, Fit time=1.937 seconds\n", + "Fit ARIMA: order=(0, 1, 2) seasonal_order=(2, 1, 1, 12); AIC=1689.420, BIC=1716.833, Fit time=2.688 seconds\n", + "Fit ARIMA: order=(2, 1, 2) seasonal_order=(2, 1, 1, 12); AIC=1680.442, BIC=1715.687, Fit time=6.800 seconds\n", + "Fit ARIMA: order=(1, 1, 1) seasonal_order=(2, 1, 1, 12); AIC=1678.863, BIC=1706.276, Fit time=5.015 seconds\n", + "Fit ARIMA: order=(1, 1, 3) seasonal_order=(2, 1, 1, 12); AIC=1681.664, BIC=1716.910, Fit time=5.950 seconds\n", + "Fit ARIMA: order=(0, 1, 1) seasonal_order=(2, 1, 1, 12); AIC=1735.788, BIC=1759.285, Fit time=1.191 seconds\n", + "Fit ARIMA: order=(2, 1, 3) seasonal_order=(2, 1, 1, 12); AIC=1681.895, BIC=1721.057, Fit time=7.162 seconds\n", + "Fit ARIMA: order=(1, 1, 2) seasonal_order=(2, 1, 0, 12); AIC=1712.516, BIC=1739.930, Fit time=4.818 seconds\n", + "Total fit time: 115.432 seconds\n" + ] + } + ], + "source": [ + "stepwise_model = auto_arima(train, start_p=1, start_q=1,\n", + " max_p=3, max_q=3, m=12,\n", + " start_P=0, seasonal=True,\n", + " d=1, D=1, trace=True,\n", + " error_action='ignore', \n", + " suppress_warnings=True, \n", + " stepwise=True) " + ] + }, + { + "cell_type": "code", + "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "ARIMA(callback=None, disp=0, maxiter=50, method=None, order=(1, 1, 1),\n", - " out_of_sample_size=0, scoring='mse', scoring_args={},\n", - " seasonal_order=(2, 1, 1, 12), solver='lbfgs', start_params=None,\n", - " suppress_warnings=True, transparams=True, trend='c')" + "1678.8046078286538" ] }, - "execution_count": 40, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "stepwise_model.fit(train)" + "stepwise_model.aic()" ] }, { "cell_type": "code", - "execution_count": 41, - "metadata": { - "collapsed": true - }, + "execution_count": 21, + "metadata": {}, "outputs": [], "source": [ - "future_forecast = stepwise_model.predict(n_periods=37)" + "future_forecast = stepwise_model.predict(n_periods=len(test))" ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([ 121.0069841 , 110.05829773, 100.66214128, 90.6704282 ,\n", - " 92.16164339, 103.22814008, 112.50690673, 112.11184857,\n", - " 101.04339558, 92.07696292, 95.82735302, 111.26301564,\n", - " 120.21587944, 111.32815023, 102.16625906, 90.55576703,\n", - " 92.13024541, 102.88613935, 111.8729164 , 111.06722082,\n", - " 100.84852305, 92.07136347, 95.82277211, 109.23003471,\n", - " 119.35147539, 110.56205836, 100.99051798, 90.20673887,\n", - " 91.75667578, 102.973546 , 112.20965839, 111.68458911,\n", - " 101.10323951, 91.83416937, 95.08657978, 109.42245514,\n", - " 119.38303158])" + "array([121.28205309, 109.85778602, 100.42445498, 90.50599999,\n", + " 92.05151236, 103.14713828, 112.45126145, 112.0794514 ,\n", + " 100.99302439, 92.05797052, 95.79899745, 111.26970242,\n", + " 120.25345264, 111.21895464, 102.08041362, 90.49647782,\n", + " 92.09080331, 102.83606718, 111.82360135, 111.02284088,\n", + " 100.78848247, 92.05251503, 95.80633522, 109.22439593,\n", + " 119.32052683, 110.49639294, 100.95795714, 90.17458038,\n", + " 91.733426 , 102.93101197, 112.16047521, 111.63266057,\n", + " 101.04494196, 91.81236362, 95.06991362, 109.39603514,\n", + " 119.38930794])" ] }, - "execution_count": 42, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -1010,10 +971,8 @@ }, { "cell_type": "code", - "execution_count": 43, - "metadata": { - "collapsed": true - }, + "execution_count": 23, + "metadata": {}, "outputs": [], "source": [ "future_forecast = pd.DataFrame(future_forecast,index = test.index,columns=['Prediction'])" @@ -1021,25 +980,25 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", - "\n", "
\n", " \n", @@ -1055,23 +1014,23 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", "
2015-01-01121.006984121.282053
2015-02-01110.058298109.857786
2015-03-01100.662141100.424455
2015-04-0190.67042890.506000
2015-05-0192.16164392.051512
\n", @@ -1080,14 +1039,14 @@ "text/plain": [ " Prediction\n", "DATE \n", - "2015-01-01 121.006984\n", - "2015-02-01 110.058298\n", - "2015-03-01 100.662141\n", - "2015-04-01 90.670428\n", - "2015-05-01 92.161643" + "2015-01-01 121.282053\n", + "2015-02-01 109.857786\n", + "2015-03-01 100.424455\n", + "2015-04-01 90.506000\n", + "2015-05-01 92.051512" ] }, - "execution_count": 44, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1098,25 +1057,25 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", - "\n", "\n", " \n", @@ -1164,7 +1123,7 @@ "2015-05-01 91.0930" ] }, - "execution_count": 45, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -1175,38 +1134,26 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 26, "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" + "ename": "AttributeError", + "evalue": "'DataFrame' object has no attribute 'iplot'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtest\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfuture_forecast\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/miniconda3/envs/auto-arima/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 4374\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4375\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4376\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4377\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4378\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'iplot'" + ] } ], "source": [ "pd.concat([test,future_forecast],axis=1).iplot()" ] }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "future_forecast2 = future_forcast" - ] - }, { "cell_type": "code", "execution_count": 48, @@ -1227,7 +1174,7 @@ } ], "source": [ - "pd.concat([data,future_forecast2],axis=1).iplot()" + "pd.concat([data,future_forecast],axis=1).iplot()" ] }, { From dd8677802768fe9f2883b99069578762504d44d5 Mon Sep 17 00:00:00 2001 From: Ogaday Date: Wed, 31 Oct 2018 21:35:26 +0000 Subject: [PATCH 5/5] Fix: Prevent training data leakage The train set and test set were overlapping. This commit ensures that the test data starts where the train data finishes up. --- Forecasting a Time Series in Python.ipynb | 181 +++++++++++----------- 1 file changed, 87 insertions(+), 94 deletions(-) diff --git a/Forecasting a Time Series in Python.ipynb b/Forecasting a Time Series in Python.ipynb index d112ea8..2b0d733 100644 --- a/Forecasting a Time Series in Python.ipynb +++ b/Forecasting a Time Series in Python.ipynb @@ -468,7 +468,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -534,7 +534,7 @@ "1985-05-01 55.3151" ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -545,7 +545,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -574,16 +574,16 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "train = data.loc['1985-01-01':'2016-12-01']" + "train = data.loc['1985-01-01':'2014-12-01']" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -616,24 +616,24 @@ " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", "
2016-08-01115.51592014-08-01108.1940
2016-09-01102.76372014-09-01100.4172
2016-10-0191.48672014-10-0192.3837
2016-11-0192.89002014-11-0199.7033
2016-12-01112.76942014-12-01109.3477
\n", @@ -642,14 +642,14 @@ "text/plain": [ " Energy Production\n", "DATE \n", - "2016-08-01 115.5159\n", - "2016-09-01 102.7637\n", - "2016-10-01 91.4867\n", - "2016-11-01 92.8900\n", - "2016-12-01 112.7694" + "2014-08-01 108.1940\n", + "2014-09-01 100.4172\n", + "2014-10-01 92.3837\n", + "2014-11-01 99.7033\n", + "2014-12-01 109.3477" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -660,7 +660,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -669,7 +669,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -735,7 +735,7 @@ "2015-05-01 91.0930" ] }, - "execution_count": 15, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -746,7 +746,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -812,7 +812,7 @@ "2018-01-01 129.4048" ] }, - "execution_count": 16, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -823,7 +823,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -832,7 +832,7 @@ "37" ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -843,7 +843,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -859,45 +859,38 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=1686.946, BIC=1706.527, Fit time=1.348 seconds\n", - "Fit ARIMA: order=(0, 1, 0) seasonal_order=(0, 1, 0, 12); AIC=1897.389, BIC=1905.221, Fit time=0.023 seconds\n", - "Fit ARIMA: order=(1, 1, 0) seasonal_order=(1, 1, 0, 12); AIC=1838.277, BIC=1853.942, Fit time=0.281 seconds\n", - "Fit ARIMA: order=(0, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=1745.226, BIC=1760.891, Fit time=0.243 seconds\n", - "Fit ARIMA: order=(1, 1, 1) seasonal_order=(1, 1, 1, 12); AIC=1687.289, BIC=1710.786, Fit time=1.478 seconds\n", - "Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 0, 12); AIC=1813.531, BIC=1829.196, Fit time=0.433 seconds\n", - "Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 2, 12); AIC=1686.349, BIC=1709.846, Fit time=3.457 seconds\n", - "Fit ARIMA: order=(0, 1, 1) seasonal_order=(0, 1, 2, 12); AIC=1744.461, BIC=1764.042, Fit time=1.203 seconds\n", - "Fit ARIMA: order=(2, 1, 1) seasonal_order=(0, 1, 2, 12); AIC=1686.471, BIC=1713.884, Fit time=4.320 seconds\n", - "Fit ARIMA: order=(1, 1, 0) seasonal_order=(0, 1, 2, 12); AIC=1758.825, BIC=1778.406, Fit time=0.884 seconds\n", - "Fit ARIMA: order=(1, 1, 2) seasonal_order=(0, 1, 2, 12); AIC=1685.864, BIC=1713.277, Fit time=4.383 seconds\n", - "Fit ARIMA: order=(2, 1, 3) seasonal_order=(0, 1, 2, 12); AIC=1689.714, BIC=1724.960, Fit time=6.895 seconds\n", - "Fit ARIMA: order=(1, 1, 2) seasonal_order=(1, 1, 2, 12); AIC=1683.989, BIC=1715.319, Fit time=3.750 seconds\n", - "Fit ARIMA: order=(1, 1, 2) seasonal_order=(1, 1, 1, 12); AIC=1686.996, BIC=1714.410, Fit time=2.271 seconds\n", - "Fit ARIMA: order=(1, 1, 2) seasonal_order=(0, 1, 1, 12); AIC=1686.893, BIC=1710.390, Fit time=1.553 seconds\n", - "Fit ARIMA: order=(0, 1, 2) seasonal_order=(1, 1, 2, 12); AIC=1695.321, BIC=1722.734, Fit time=3.538 seconds\n", - "Fit ARIMA: order=(2, 1, 2) seasonal_order=(1, 1, 2, 12); AIC=1687.095, BIC=1722.340, Fit time=6.486 seconds\n", - "Fit ARIMA: order=(1, 1, 1) seasonal_order=(1, 1, 2, 12); AIC=1684.654, BIC=1712.068, Fit time=8.034 seconds\n", - "Fit ARIMA: order=(1, 1, 3) seasonal_order=(1, 1, 2, 12); AIC=1685.565, BIC=1720.811, Fit time=7.571 seconds\n", - "Fit ARIMA: order=(0, 1, 1) seasonal_order=(1, 1, 2, 12); AIC=1743.820, BIC=1767.317, Fit time=1.797 seconds\n", - "Fit ARIMA: order=(2, 1, 3) seasonal_order=(1, 1, 2, 12); AIC=1687.572, BIC=1726.734, Fit time=7.718 seconds\n", - "Fit ARIMA: order=(1, 1, 2) seasonal_order=(2, 1, 2, 12); AIC=1680.474, BIC=1715.720, Fit time=6.542 seconds\n", - "Fit ARIMA: order=(1, 1, 2) seasonal_order=(2, 1, 1, 12); AIC=1678.805, BIC=1710.134, Fit time=5.650 seconds\n", - "Fit ARIMA: order=(1, 1, 2) seasonal_order=(1, 1, 0, 12); AIC=1765.525, BIC=1789.022, Fit time=1.937 seconds\n", - "Fit ARIMA: order=(0, 1, 2) seasonal_order=(2, 1, 1, 12); AIC=1689.420, BIC=1716.833, Fit time=2.688 seconds\n", - "Fit ARIMA: order=(2, 1, 2) seasonal_order=(2, 1, 1, 12); AIC=1680.442, BIC=1715.687, Fit time=6.800 seconds\n", - "Fit ARIMA: order=(1, 1, 1) seasonal_order=(2, 1, 1, 12); AIC=1678.863, BIC=1706.276, Fit time=5.015 seconds\n", - "Fit ARIMA: order=(1, 1, 3) seasonal_order=(2, 1, 1, 12); AIC=1681.664, BIC=1716.910, Fit time=5.950 seconds\n", - "Fit ARIMA: order=(0, 1, 1) seasonal_order=(2, 1, 1, 12); AIC=1735.788, BIC=1759.285, Fit time=1.191 seconds\n", - "Fit ARIMA: order=(2, 1, 3) seasonal_order=(2, 1, 1, 12); AIC=1681.895, BIC=1721.057, Fit time=7.162 seconds\n", - "Fit ARIMA: order=(1, 1, 2) seasonal_order=(2, 1, 0, 12); AIC=1712.516, BIC=1739.930, Fit time=4.818 seconds\n", - "Total fit time: 115.432 seconds\n" + "Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=1562.673, BIC=1581.920, Fit time=0.810 seconds\n", + "Fit ARIMA: order=(0, 1, 0) seasonal_order=(0, 1, 0, 12); AIC=1760.323, BIC=1768.021, Fit time=0.023 seconds\n", + "Fit ARIMA: order=(1, 1, 0) seasonal_order=(1, 1, 0, 12); AIC=1700.037, BIC=1715.434, Fit time=0.234 seconds\n", + "Fit ARIMA: order=(0, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=1616.434, BIC=1631.831, Fit time=0.249 seconds\n", + "Fit ARIMA: order=(1, 1, 1) seasonal_order=(1, 1, 1, 12); AIC=1563.343, BIC=1586.439, Fit time=1.089 seconds\n", + "Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 0, 12); AIC=1678.972, BIC=1694.369, Fit time=0.468 seconds\n", + "Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 2, 12); AIC=1562.675, BIC=1585.771, Fit time=3.676 seconds\n", + "Fit ARIMA: order=(1, 1, 1) seasonal_order=(1, 1, 2, 12); AIC=1561.966, BIC=1588.911, Fit time=3.803 seconds\n", + "Fit ARIMA: order=(0, 1, 1) seasonal_order=(1, 1, 2, 12); AIC=1616.436, BIC=1639.532, Fit time=1.695 seconds\n", + "Fit ARIMA: order=(2, 1, 1) seasonal_order=(1, 1, 2, 12); AIC=1562.349, BIC=1593.144, Fit time=4.780 seconds\n", + "Fit ARIMA: order=(1, 1, 0) seasonal_order=(1, 1, 2, 12); AIC=1629.405, BIC=1652.501, Fit time=1.257 seconds\n", + "Fit ARIMA: order=(1, 1, 2) seasonal_order=(1, 1, 2, 12); AIC=1562.110, BIC=1592.905, Fit time=4.955 seconds\n", + "Fit ARIMA: order=(0, 1, 0) seasonal_order=(1, 1, 2, 12); AIC=1639.110, BIC=1658.356, Fit time=1.278 seconds\n", + "Fit ARIMA: order=(2, 1, 2) seasonal_order=(1, 1, 2, 12); AIC=1564.868, BIC=1599.512, Fit time=7.150 seconds\n", + "Fit ARIMA: order=(1, 1, 1) seasonal_order=(2, 1, 2, 12); AIC=1559.223, BIC=1590.018, Fit time=4.891 seconds\n", + "Fit ARIMA: order=(1, 1, 1) seasonal_order=(2, 1, 1, 12); AIC=1557.471, BIC=1584.416, Fit time=6.139 seconds\n", + "Fit ARIMA: order=(1, 1, 1) seasonal_order=(1, 1, 0, 12); AIC=1629.749, BIC=1648.996, Fit time=1.138 seconds\n", + "Fit ARIMA: order=(0, 1, 1) seasonal_order=(2, 1, 1, 12); AIC=1610.175, BIC=1633.271, Fit time=1.148 seconds\n", + "Fit ARIMA: order=(2, 1, 1) seasonal_order=(2, 1, 1, 12); AIC=1558.262, BIC=1589.057, Fit time=5.611 seconds\n", + "Fit ARIMA: order=(1, 1, 0) seasonal_order=(2, 1, 1, 12); AIC=1623.655, BIC=1646.751, Fit time=1.121 seconds\n", + "Fit ARIMA: order=(1, 1, 2) seasonal_order=(2, 1, 1, 12); AIC=1558.117, BIC=1588.912, Fit time=6.771 seconds\n", + "Fit ARIMA: order=(0, 1, 0) seasonal_order=(2, 1, 1, 12); AIC=1634.432, BIC=1653.678, Fit time=0.974 seconds\n", + "Fit ARIMA: order=(2, 1, 2) seasonal_order=(2, 1, 1, 12); AIC=1560.934, BIC=1595.578, Fit time=7.581 seconds\n", + "Fit ARIMA: order=(1, 1, 1) seasonal_order=(2, 1, 0, 12); AIC=1584.211, BIC=1607.307, Fit time=4.474 seconds\n", + "Total fit time: 71.327 seconds\n" ] } ], @@ -913,16 +906,16 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "1678.8046078286538" + "1557.4705689357481" ] }, - "execution_count": 20, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -933,7 +926,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -942,25 +935,25 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([121.28205309, 109.85778602, 100.42445498, 90.50599999,\n", - " 92.05151236, 103.14713828, 112.45126145, 112.0794514 ,\n", - " 100.99302439, 92.05797052, 95.79899745, 111.26970242,\n", - " 120.25345264, 111.21895464, 102.08041362, 90.49647782,\n", - " 92.09080331, 102.83606718, 111.82360135, 111.02284088,\n", - " 100.78848247, 92.05251503, 95.80633522, 109.22439593,\n", - " 119.32052683, 110.49639294, 100.95795714, 90.17458038,\n", - " 91.733426 , 102.93101197, 112.16047521, 111.63266057,\n", - " 101.04494196, 91.81236362, 95.06991362, 109.39603514,\n", - " 119.38930794])" + "array([118.03491147, 108.7540358 , 100.4580758 , 89.87684977,\n", + " 92.28568729, 102.61446451, 111.59283657, 110.72327607,\n", + " 99.7325228 , 91.87267577, 96.39353831, 109.49430632,\n", + " 117.42272981, 108.22115232, 100.57831178, 90.27458144,\n", + " 92.23678195, 102.51126709, 111.95153831, 110.762962 ,\n", + " 99.62040835, 91.85119294, 96.07617065, 110.35951153,\n", + " 118.70652065, 109.08683149, 101.45466743, 90.37675255,\n", + " 92.23242288, 102.38626699, 111.35758369, 110.26903977,\n", + " 99.748839 , 91.93560549, 96.7055109 , 110.27813967,\n", + " 118.75490027])" ] }, - "execution_count": 22, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -971,7 +964,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -980,7 +973,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -1014,23 +1007,23 @@ " \n", " \n", " 2015-01-01\n", - " 121.282053\n", + " 118.034911\n", " \n", " \n", " 2015-02-01\n", - " 109.857786\n", + " 108.754036\n", " \n", " \n", " 2015-03-01\n", - " 100.424455\n", + " 100.458076\n", " \n", " \n", " 2015-04-01\n", - " 90.506000\n", + " 89.876850\n", " \n", " \n", " 2015-05-01\n", - " 92.051512\n", + " 92.285687\n", " \n", " \n", "\n", @@ -1039,14 +1032,14 @@ "text/plain": [ " Prediction\n", "DATE \n", - "2015-01-01 121.282053\n", - "2015-02-01 109.857786\n", - "2015-03-01 100.424455\n", - "2015-04-01 90.506000\n", - "2015-05-01 92.051512" + "2015-01-01 118.034911\n", + "2015-02-01 108.754036\n", + "2015-03-01 100.458076\n", + "2015-04-01 89.876850\n", + "2015-05-01 92.285687" ] }, - "execution_count": 24, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -1057,7 +1050,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -1123,7 +1116,7 @@ "2015-05-01 91.0930" ] }, - "execution_count": 25, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" }