-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathbvar_rolling.py
174 lines (132 loc) · 5.88 KB
/
bvar_rolling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 30 16:18:03 2017
@author: rakshitanagalla
"""
import numpy as np
import pandas as pd
import datetime as dt
from dateutil.relativedelta import relativedelta
import calendar
def add_months(sourcedate,months):
month = sourcedate.month - 1 + months
year = int(sourcedate.year + month / 12 )
month = month % 12 + 1
day = min(sourcedate.day,calendar.monthrange(year,month)[1])
return dt.date(year,month,day)
data = pd.read_csv('/Users/rakshitanagalla/Desktop/Acad/PRML/project/Recent_toBeUsed.csv')
data.drop(['e','YearMonth','Unnamed: 4', 'y', 'y_US','m_in_billion_rupees', 'm_US_in billion rupees','m_in_billion_rupees.1','i', 'i_US','m_US_in billion', 'Dummy', 'Dummy2'],inplace=True,axis=1)
DTTMFormat = '%d-%m-%Y'
data['DTTM'] = pd.to_datetime(data['DTTM'],format=DTTMFormat)
data.set_index('DTTM', inplace = 'True')
data_diff = data.diff().dropna() #differencing
ForecastTime = dt.datetime.strptime('1-7-2006', DTTMFormat)
lag = 13
constant =1
M = 4 #Number of endogenous variables
outputPredDF = pd.DataFrame()
se = pd.DataFrame()
cnt = 0
while cnt < 110:
train_period = [ForecastTime - relativedelta(years=10), add_months(ForecastTime,-1)]
if cnt == 0:
train = data_diff[add_months(train_period[0],1):train_period[1]]
else:
train = data_diff[train_period[0]:train_period[1]]
test = data[ForecastTime:add_months(ForecastTime+relativedelta(years=1),-1)]
#X1 = np.matrix(pd.concat([train, train.shift(1)], axis=1).dropna()[:-1])
Xdum = train
for l in range(lag-1):
l+=1
Xdum = pd.concat([Xdum, train.shift(l)], axis=1)
X1 = np.matrix(Xdum.dropna()[:-1])
#X1 = np.matrix(train.shift(1).dropna())
X = np.c_[np.ones(len(X1)), X1 ]
Y = np.matrix(train[lag:])
# OLS coefficients
A_OLS = np.dot(np.linalg.inv(np.dot(X.T,X)),np.dot(X.T,Y)) # This is the matrix of regression coefficients
a_OLS = A_OLS.T.ravel().T # This is the vector of coefficients, i.e. it holds
# that a_OLS = vec(A_OLS)
SSE = np.dot( (Y - np.dot(X,A_OLS)).T , (Y - np.dot(X,A_OLS)) )
K,M = A_OLS.shape
T = X.shape[0]
SIGMA_OLS = SSE/(T-K)
'''
Prior Hyperparameters for Minesotta prior
'''
A_prior = np.zeros((K,M))
a_prior = np.matrix(A_prior.ravel()).T
# Hyperparameters on the Minnesota variance of alpha
a_bar_1 = 0.2;
a_bar_2 = 0.14;
a_bar_3 = 10**2;
# Now get residual variances of univariate p-lag autoregressions. Here
# we just run the AR(p) model on each equation,(if they have been specified for the original
# VAR model)
sigma_sq = np.zeros((M,1)); # vector to store residual variances
for i in range(M):
# Create lags of dependent variable in i-th equation
#Ylag_i = X[:,[i+1,i+1+M]]
Ylag_i = X[:,np.arange(constant+i,K,M)]
# Dependent variable in i-th equation
Y_i = Y[:,i]
# OLS estimates of i-th equation
alpha_i = np.dot(np.linalg.inv(np.dot(Ylag_i.T,Ylag_i)),np.dot(Ylag_i.T,Y_i))
sigma_sq[i,0] = np.dot((Y_i - np.dot(Ylag_i,alpha_i)).T,(Y_i - np.dot(Ylag_i,alpha_i)))/(T-lag+1)
# Now define prior hyperparameters.
# Create an array of dimensions K x M, which will contain the K diagonal
# elements of the covariance matrix, in each of the M equations.
V_i = np.zeros((K,M));
# index in each equation which are the own lags
ind = np.zeros((M,lag));
for i in range(M):
ind[i,:] = np.arange(constant+i,K,M)
for i in range(M): # for each i-th equation
for j in range(K): # for each j-th RHS variable
if j==0:
V_i[j,i] = a_bar_3*sigma_sq[i,0] # variance on constant
elif j in ind[i,:]:
V_i[j,i] = a_bar_1/(np.ceil((j)/M)**2); # variance on own lags
else:
for kj in range(M):
if j in ind[kj,:]:
ll = kj
V_i[j,i] = (a_bar_2*sigma_sq[i,0])/((np.ceil((j)/M)**2)*sigma_sq[ll,0]);
# Now V is a diagonal matrix with diagonal elements the V_i
V_prior = np.diag(V_i.ravel()); # this is the prior variance of the vector a
# SIGMA is equal to the OLS quantity
SIGMA = SIGMA_OLS;
'''
POSTERIORS
'''
#--------- Posterior hyperparameters of ALPHA and SIGMA with Minnesota Prior
V_post = np.linalg.inv(np.linalg.inv(V_prior) + np.kron(np.linalg.inv(SIGMA),np.dot(X.T,X)) );
a_post = np.dot(V_post, np.dot(np.linalg.inv(V_prior),a_prior) + np.dot(np.kron(np.linalg.inv(SIGMA),np.dot(X.T,X)),a_OLS) )
A_post = np.reshape(a_post, (K, M),order='F')
'''
PREDICTIVE INFERENCE
'''
forecasts = np.zeros(12)
test_new = train[-lag:]
for step in range(12):
#X2 = np.matrix(pd.concat([test_new[-lag:], test_new[-lag:].shift(1)], axis=1).dropna())
Xdum2 = test_new[-lag:]
for l in range(lag-1):
l+=1
Xdum2 = pd.concat([Xdum2, test_new[-lag:].shift(l)], axis=1)
X2 = np.matrix(Xdum2.dropna())
#X2 = np.matrix(test_new[-lag:])
X_test = np.c_[np.ones(len(X2)), X2]
forecasts[step] = np.dot(X_test,A_post)[0,0]
test_new.loc[step+lag] = np.array(np.dot(X_test,A_post))[0]
#final_pred = np.exp(forecasts)
final_pred = np.exp(np.r_[data['ln_e'][train_period[1]], forecasts].cumsum())[1:]
#save predictions and errors
columName = str(ForecastTime.year)+'M'+str(ForecastTime.month)
outputPredDF[columName+'_pred'] = final_pred
se[columName+'_err'] = np.array((np.exp(test.ln_e) - final_pred)**2)
cnt +=1
ForecastTime = add_months(ForecastTime,1)
print(train_period)
rmse = np.sqrt(se.mean(axis = 1)) #T_0 = 110