-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathBayesianMSVAR.tex
executable file
·544 lines (376 loc) · 33.8 KB
/
BayesianMSVAR.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
\documentclass[final,3p,authoryear]{elsarticle}
%\usepackage{float}
\usepackage[none]{hyphenat}
\usepackage{amsmath,amsfonts,amssymb,amsthm,pxfonts,dsfont}
\usepackage[ansinew]{inputenc}
\usepackage{setspace}
\usepackage{natbib}
\usepackage[bookmarks, colorlinks, pdfauthor={Matthieu Droumaguet, Anders Warne and Tomasz Wozniak}]{hyperref}
%\hypersetup{linkcolor=blue,citecolor=blue,filecolor=black,urlcolor=blue}
\usepackage{graphicx}
\usepackage{color}
\usepackage{geometry}
%\usepackage{multirow}
%\usepackage{booktabs}
%\usepackage{longtable,lscape,ltcaption}
%\usepackage{rotating}
%\usepackage{soul}
%
%\setstretch{2}
%\geometry{margin=2cm}
%___________________________________________________________________________________________________
%
% Document begins
%___________________________________________________________________________________________________
\begin{document}
\biboptions{longnamesfirst,semicolon}
%\theoremstyle{remark}
\newtheorem{thm}{Theorem}
\newtheorem{cl}{Corollary}
\newtheorem{prop}{Proposition}
\newtheorem{lm}{Lemma}
\newdefinition{df}{Definition}
\newdefinition{ex}{Example}
\newdefinition{as}{Assumption}
\newdefinition{pr}{Property}
\newdefinition{rs}{Restriction}
\newdefinition{al}{Algorithm}
\begin{frontmatter}
\title{Bayesian Estimation and Inference for Markov-Switching VARs in R}
\author[eui]{Matthieu Droumaguet}
\author[ecb]{Anders Warne}
\author[um]{Tomasz Wo\'zniak}%\corref{cor1}}
%\cortext[cor1]{, website: \url{http://bit.ly/tomaszwozniak}\\ \linebreak
%\vspace{0.3cm}}
\address[eui]{Department of Economics, European University Institute}
\address[ecb]{Directorate General Research, European Central Bank}
\address[um]{Department of Economics, University of Melbourne\\[2ex]
email: \href{mailto:[email protected]}{[email protected]}\\[4ex]
\textcopyright{} 2016 Matthieu Droumaguet, Anders Warne, \& Tomasz Wo\'zniak
}
\begin{abstract}
A block Metropolis-Hastings algorithm for the Bayesian estimation of the Markov-switching Vector Autoregressive models with restrictions for Granger noncausality is provided, as well as an appropriate estimator for the marginal data density. The codes are available under the GNU General Public License v3.0. To refer to the codes in publications, please, cite the following paper:
\bigskip\noindent Droumaguet, M., Warne, A., Wo\'zniak, T. (2017) Granger Causality and Regime Inference in Markov-Switching VAR Models with Bayesian Methods, \emph{Journal of Applied Econometrics}, 32(4), pp. 802--818.
\end{abstract}
\begin{keyword}
R \sep Markov-switching VARs \sep Block Metropolis-Hastings Sampler \sep Marginal Data Density
\end{keyword}
\end{frontmatter}
\section{The Markov-switching VAR Model}\label{sec:Model}
\noindent
The model under consideration is the Markov-switching Vector Autoregressive process with the restrictions for Granger noncausality. The model presented in this manuscript was used in \cite{Droumaguet2016} to make inference about Granger causality between money and income data for the U.S. The exposition in this note is based on \cite{Droumaguet2016online}.
\paragraph{Notation and model} Let $\mathbf{y}_T = (y_1,\dots,y_T)^{'}$ denote a time series of $T$ observations, where each $y_{t}$ is a $N$-variate real-valued vector for $t \in \{1,\dots,T\}$. We consider a class of parametric Markov mixture distribution models in which the stochastic process $y_t$ depends on the realizations of a hidden discrete stochastic process $s_t$ with finite state space $\{ 1,\dots,M \}$ \citep[see][]{Hamilton:1989zl}. Conditioned on the state, $s_t$, and time series up to time $t-1$, denoted by $\mathbf{y}_{t-1}$, $y_t$ follows an independent identical normal distribution. Specifically, we let:
\begin{align}
y_t &= \mu_{s_t} + A_{s_t}^{(1)} y_{t-1} + \dots + A_{s_t}^{(p)} y_{t-p} + \epsilon_t, \label{eq:ConditionalMean}\\
\epsilon_t | s_t &\sim i.i.\mathcal{N}(\mathbf{0}, \Sigma_{s_t}), \label{eq:epsilon1}
\end{align}
for $t\in\{1,\dots,T\}$. We set the vector of initial values $\mathbf{y}_0 = (y_{p-1}, \dots, y_0)^{\prime}$ to the first $p$ observations of the available data.
The variable $s_t$ is assumed to be an irreducible aperiodic Markov chain with $\Pr(S_0 = i|\mathbf{P}) = \mathbf{\pi}_i$, where $\mathbf{\pi} = (\pi_1,\dots,\pi_M)$ is the ergodic distribution of the Markov Switching (MS) process. Its properties are sufficiently described by the $(M\times M)$ transition probabilities matrix:
\begin{equation}\label{eq:Pmatrix}
\mathbf{P} = \begin{bmatrix} p_{11} & p_{12} & \dots & p_{1M} \\ p_{21} & p_{22} & \dots & p_{2M} \\ \vdots & \vdots & \ddots & \vdots \\ p_{M1} & p_{M2} & \dots & p_{MM} \end{bmatrix},
\end{equation}
in which an element, $p_{ij}$, denotes the probability of transition from state $i$ to state $j$, $ p_{ij} = \Pr(s_{t+1} = j| s_{t} = i) $. The elements of each row of matrix $\mathbf{P}$ sum to one, $ \sum_{j=1}^{M} p_{ij} = 1 $. Equations (\ref{eq:ConditionalMean})--(\ref{eq:Pmatrix}) represent a MS-VAR model with $M$ states and $p$ lags.
\paragraph{Likelihood function} Let $\theta$ be a vector of size $k$, collecting parameters of the transition probabilities matrix $\mathbf{P}$ and all the state-dependent parameters of the VAR process, $\theta_{s_t}$: $\mu_{s_t}$, $A_{s_t}^{(i)}$, $\Sigma_{s_t}$, for $s_t \in\{ 1,\dots,M\}$ and $i \in\{ 1,\dots,p\}$. The complete-data likelihood function is equal to the joint sampling distribution $p(\mathbf{S}_T ,\mathbf{y}_T |\theta)$ for the complete data $(\mathbf{S}_T ,\mathbf{y}_T )$ given the parameters, where $\mathbf{S}_T = (s_0, s_1, \dots, s_T)$; see, e.g., \cite{Fruhwirth-Schnatter2006}. This likelihood is further decomposed into:
\begin{equation}\label{eq:CompleteData}
p\bigl( \mathbf{S}_T , \mathbf{y}_T \big|\theta \bigr) = p\bigl( \mathbf{y}_T \big| \mathbf{S}_T ,\theta \bigr) \Pr\bigl( \mathbf{S}_T \big| \mathbf{P} \bigr),
\end{equation}
where the parameters $\theta = (\theta_1 , \ldots , \theta_M , \mathbf{P})$. The two components on the right hand side of equation (\ref{eq:CompleteData}) are the same as the standard MS-VAR model of \citeauthor{Fruhwirth-Schnatter2006}~(\citeyear{Fruhwirth-Schnatter2006}, Section 11.3.1).
\paragraph{Prior distribution}
We assume that the prior distribution of the state-specific parameters for each state and the transition probabilities matrix are independent:
\begin{equation}\label{eq:prior1}
p\bigl(\theta \bigr) = \prod_{i=1}^{M} p\bigl(\theta_i\bigr)p\bigl(\mathbf{P}\bigr).
\end{equation}
For the unrestricted MS-VAR model, we assume the following prior specification. Each row of the transition probabilities matrix, $\mathbf{P}$, \emph{a priori} follows an $M$-variate Dirichlet distribution, with the $M\times1$ parameter vectors $e_m$, for $m\in\{1,\dots,M\}$. Finally, collect all of the vectors of parameters into a $M^2\times1$ vector $e=(e_1^{'},\dots,e_M^{'})^{'}$.
Furthermore, the state-dependent parameters of the VAR process are collected in vectors
$$\beta_{s_t} = \biggl( \mu_{s_t}^{\prime}, \text{vec}\bigl(A_{s_t}^{(1)}\bigr)^{\prime},\dots,\text{vec}\bigl(A_{s_t}^{(p)}\bigr)^{\prime} \biggr)^{\prime},$$
for $s_t = 1,\dots,M$. For each $s_t\in\{1,\dots,M\}$ $\beta_{s_t}$ follows \emph{a priori} a $(N+pN^2)$-variate normal distribution, with mean equal to $\mu_\beta$ and a covariance matrix $V_\beta$. Furthermore, let $B_{s_t}=\left(\mu_{s_t}, A_{s_t}^{(1)},\dots,A_{s_t}^{(p)}\right)^{'}$ be a $(1+pN)\times N$ matrix of state-specific autoregressive parameters.
Let $\sigma_{s_t}$ be an two-dimensional vector of standard deviations and $\mathbf{R}_{s_t}$ a $2 \times 2$ correlation matrix. The covariance matrix of $\epsilon_t|s_t$ can now be decomposed as:
\begin{equation}\label{eq:SigmaDecomposed}
\Sigma_{s_t} = \text{diag}\bigl(\sigma_{s_t}\bigr)\mathbf{R}_{s_t}\text{diag}\bigl(\sigma_{s_t}\bigr),
\end{equation}
Modeling covariance matrices using a decomposition into standard deviations and a correlation matrix, as in equation (\ref{eq:SigmaDecomposed}), was proposed for Bayesian inference by \cite{Barnard2000}. Each standard deviation $\sigma_{j.s_t}$ for $s_t = 1,\dots,M$ and $j=1,2$, follows a log-normal distribution, with the location parameter equal to $\mu_\sigma$ and the standard deviation parameter set to $\lambda_\sigma >0$.
Finally, we assume that the marginal prior distribution for the below-diagonal element of the (symmetric) correlation matrix $\mathbf{R}_{s_t}$ is a uniform distribution on the interval $(-1,1)$. We collect all the standard deviations in one vector, $\sigma = (\sigma_1^{\prime},\dots,\sigma_M^{\prime})^{\prime}$, and all the unknown correlation coefficients into a vector
$$\mathbf{R} = (\text{vecl}(\mathbf{R}_1)^{\prime},\dots,\text{vecl}(\mathbf{R}_M)^{\prime})^{\prime},$$
where the $\text{vecl}$ operator stacks all the lower-diagonal elements of the correlation matrix into a vector.
To summarize, the prior specification (\ref{eq:prior1}) now takes the detailed form of:
\begin{equation}\label{eq:prior2}
p(\theta) = \prod_{i=1}^{M} p(\mathbf{P}_i)p(\beta_i)p(\mathbf{R}_i)\left( \prod_{j=1}^{N} p(\sigma_{i.j}) \right),
\end{equation}
where each of the prior distributions is as assumed:
\begin{align*}
\mathbf{P}_{i\cdot} &\sim \mathcal{D}_{M}(e_i) \\
\beta &\sim \mathcal{N}(\mu_\beta,V_\beta) \\
\sigma_{i.j} &\sim log\mathcal{N}(\mu_\sigma,\lambda_\sigma) \\
\mathbf{R}_{i.jk} &\sim \mathcal{U}(-1,1)
\end{align*}
for $i=1,\dots,M$ and $j,k=1,\dots,N$, and $j\neq k$.
\section{Block Metropolis-Hastings algorithm}\label{app:BMHA}
\noindent
This section describes all the constituting blocks that form the MCMC sampler.
\subsection{Simulating Hidden Markov Process}\label{ssec:states}
\noindent
The first drawn parameter is the vector representing the states of the economy, $\mathbf{S}_T$. We first use a filter and smoother \citep[see Section 11.2 of][ and references therein]{Fruhwirth-Schnatter2006} and obtain the probabilities $\Pr(s_t = i|\mathbf{y}_T , \theta^{(l-1)})$, for $t\in\{1,\dots,T\}$ and $i\in\{1,\dots,M\}$, and then draw $\mathbf{S}_T^{(l)}$, for $l^{\text{th}}$ iteration of the algorithm. For the full description of the algorithm used in this work the reader is referred to \cite{Droumaguet2012}.
\subsection{Sampling Transition Probabilities}
\noindent
In this step of the MCMC sampler, we draw from the posterior distribution of the transition probabilities matrix, conditioning on the states drawn in the previous step of the current iteration, $\mathbf{P}^{(l)} \sim p(\mathbf{P}|\mathbf{S}_T^{(l)})$. \cite{Sims2008} provide a flexible analytical framework for working with restricted transition probabilities, and the reader is invited to consult Section~3 of their paper for an exhaustive description of the possibilities provided by the framework. We however limit the latitude given by the reparametrization in order to ensure the stationarity of Markov chain $\mathbf{S}_T$.
\paragraph{Reparametrization} The transition probabilities matrix $\mathbf{P}$ is modeled with $Q$ vectors $w_j$, $j=1,\cdots,Q$ and each of size $d_j \leq M$. Let all the elements of $w_j$ belong to the $(0,1)$ interval and sum up to one, and stack all of them into the column vector $\mathbf{w}=(w_1^{'}, \dots, w_Q^{'})^{'}$ of dimension $d=\sum_{j=1}^{Q}d_j$. Writing $\mathbf{p}=vec(\mathbf{P}^{'})$ as a $M^2$ dimensional column vector, and introducing the $(M^2 \times d)$ matrix $\mathbf{M}$, the transition matrix is decomposed as:
\begin{equation}\label{eq:RestrictedP}
\mathbf{p} = \mathbf{M} \mathbf{w},
\end{equation}
where the $\mathbf{M}$ matrix is composed of the $M_{ij}$ sub-matrices of dimension $(M \times d_j)$, where $i\in\{1,\dots,M\}$, and $j\in\{1,\dots,Q\}$:
\begin{equation}
\mathbf{M} = \begin{bmatrix} M_{11} & \dots & M_{1Q} \\ \vdots & \ddots & \\ M_{M1} & & M_{MQ} \end{bmatrix},
\nonumber
\end{equation}
where each $M_{ij}$ satisfies the following conditions:
\begin{enumerate}
\item For each $(i,j)$, all elements of $M_{ij}$ are non-negative.
\item $ \imath_{M}^{'} M_{ij} = \Lambda_{ij} \imath_{d_j}^{'}$, where $\Lambda_{ij}$ is the sum of the elements in any column of $M_{ij}$.
\item Each row of $\mathbf{M}$ has, at most, one non-zero element.
\item $M$ is such that $\mathbf{P}$ is irreducible: for all $j, d_j \ge 2 $.
\end{enumerate}
The first three conditions are inherited from \cite{Sims2008}, whereas the last condition assures that $\mathbf{P}$ is irreducible, forbidding the presence of an absorbing state that would render the Markov chain $\mathbf{S}_T$ non-stationary. The lack of independence of the rows of $\mathbf{P}$ is described in \citet[][Section 11.5.5]{Fruhwirth-Schnatter2006}. Once the initial state $s_0$ is drawn from the ergodic distribution $\pi$ of $\mathbf{P}$, direct MCMC sampling from the conditional posterior distribution becomes impossible. However, a Metropolis-Hastings step can be set up to circumvent this issue, since a kernel of joint posterior density of all rows is known: $p(\mathbf{P}|\mathbf{S}_T ) \propto \pi\prod_{j=1}^Q \mathcal{D}_{d_j}(w_j)$. Hence, the proposal for transition probabilities is obtained by sampling each $w_j$ from the convenient Dirichlet distribution. We then transform the column vector $\mathbf{w}$ into our candidate matrix of transitions probabilities using equation (\ref{eq:RestrictedP}). Finally, we compute the acceptance rate before retaining or discarding the draw.
\begin{al}\label{al:MH-P} \textit{Metropolis-Hastings step for the restricted transition matrix.}
\begin{enumerate}
\item $s_0 \sim \pi$. The initial state is drawn from the ergodic distribution of $\mathbf{P}$.
\item $w_j \sim \mathcal{D}_{d_j} (n_{1,j} + e_{1,j},\dots, n_{d_j,j} + e_{d_j,j} )$ for $j\in\{1,\dots, Q\}$. $n_{i,j}$ corresponds to the number of transitions from state $i$ to state $j$, counted from $\mathbf{S}_T$. The candidate transition probabilities matrix -- in the transformed notation -- are sampled from a Dirichlet distribution.
\item $\mathbf{P}^{new} = \mathbf{M} \mathbf{w}$. The proposal for the transitions probabilities matrix is reconstructed.
\item Accept $\mathbf{P}^{new}$ if $u \le (\pi^{new}/\pi^{l-1})$, where $u \sim \mathcal{U}[0,1]$. $\pi^{new}$ and $\pi^{l-1}$ are the ergodic probabilities of $s_0^{(l)}$ that are computed from $\mathbf{P}^{new}$ and $\mathbf{P}^{l-1}$ respectively.
\end{enumerate}
\end{al}
\subsection{Sampling Correlation Coefficients and Standard Deviations}
\noindent
Adapting the approach proposed by \cite{Barnard2000} to Markov-switching models, we sample from the full conditional distribution of unrestricted and restricted covariance matrices.
\begin{al}\label{al:Griddy-stdev} \textit{Griddy-Gibbs for the standard deviations.} The algorithm iterates on all the standard deviation parameters $\sigma_{s_t.j}$ for $i\in\{1,\dots,N\}$ and $s_t\in\{1,\dots,M\}$. The grid is centered on the residuals' sample standard deviation $\hat{\sigma}_{s_t.i}$ and divides the interval $(\hat{\sigma}_{s_t.i} - 3 \hat{\sigma}_{\hat{\sigma}_{s_t.i}} , \hat{\sigma}_{s_t.i} + 3 \hat{\sigma}_{\hat{\sigma}_{s_t.i}} )$ into $G$ grid points. $\hat{\sigma}_{\hat{\sigma}_{s_t.i}}$ is an estimator of the standard error of the estimator of the sample standard deviation.
\begin{enumerate}
\item Regime-invariant standard deviations: Draw from the unknown univariate density
\begin{equation*}
p \bigl(\sigma_{i} \big| \mathbf{y}_T , \mathbf{S}_T , \mathbf{P}, \beta, \sigma_{-i},\mathbf{R} \bigr).
\end{equation*}
This is done by evaluating a kernel on a grid of points, using the proportionality relation, with the likelihood function times the prior: $\sigma_{i}| \mathbf{y}_T , \mathbf{S}_T , \mathbf{P}, \beta, \sigma_{-i},\mathbf{R} \propto p( \mathbf{y}_T | \mathbf{S}_T , \theta) \cdot p(\sigma_{i})$. Reconstruct the c.d.f. from the grid through deterministic integration and sample from it.
\item Regime-varying standard deviations: For all regimes $s_t\in\{1,\dots,M\}$, draw from the univariate density
\begin{equation*}
p \bigl( \sigma_{s_t.i} \big| \mathbf{y}_T , \mathbf{S}_T , \mathbf{P}, \beta, \sigma_{s_t.-i},\mathbf{R} \bigr),
\end{equation*}
evaluating a kernel thanks to the proportionality relation, with the likelihood function times the prior: $\sigma_{s_t.i} | \mathbf{y}_T , \mathbf{S}_T , \mathbf{P}, \beta, \sigma_{s_t.-i},\mathbf{R} \propto p( \mathbf{y}_T | \mathbf{S}_T , \theta ) \cdot p(\sigma_{i.s_t})$.
\end{enumerate}
\end{al}
\begin{al}\label{al:Griddy-corr} \textit{Griddy-Gibbs for the correlations} The algorithm iterates on all the correlation parameters $\mathbf{R}_{s_t.i}$ for $i\in\{1,\dots,(N-1)N/2\}$ and $s_t\in\{1,\dots,M\}$. The marginal posterior density of each correlation coefficient is bounded by $a$ from below and $b$ from above. $a$ and $b$ are computed so that although correlation coefficients are sampled from the marginal posterior distributions the resulting correlation matrix is positive definite See \cite{Barnard2000} for the algorithm determining $a$ and $b$ for each $\mathbf{R}_{s_t.i}$. The grid divides interval $(a,b)$ into $G$ grid points.
\begin{enumerate}
\item Depending on the restriction scheme, set correlation parameters to 0.
\item Regime-invariant correlations: Draw from the univariate density
\begin{equation*}
p \bigl( \mathbf{R}_{i} \big| \mathbf{y}_T , \mathbf{S}_T , \mathbf{P}, \beta, \sigma,\mathbf{R}_{-i} \bigr),
\end{equation*}
evaluating a kernel thanks to the proportionality relation, with the likelihood function times the prior: $\mathbf{R}_{i}| \mathbf{y}_T , \mathbf{S}_T , \mathbf{P}, \beta, \sigma,\mathbf{R}_{-i} \propto p( \mathbf{y}_T | \mathbf{S}_T , \theta) \cdot p(\mathbf{R}_{i})$.
\item Regime-varying correlations: For all regimes $s_t\in\{1,\dots,M\}$, draw from the univariate density
\begin{equation*}
p \bigl( \mathbf{R}_{s_t.i} \big| \mathbf{y}_T , \mathbf{S}_T , \mathbf{P}, \beta, \sigma,\mathbf{R}_{s_t.-i} \bigr),
\end{equation*}
evaluating a kernel thanks to the proportionality relation, with the likelihood function times the prior: $\mathbf{R}_{s_t.-i} | \mathbf{y}_T , \mathbf{S}_T , \mathbf{P}, \beta, \sigma,\mathbf{R}_{s_t.-i} \propto p( \mathbf{y}_T | \mathbf{S}_T , \theta ) \cdot p(\mathbf{R}_{s_t.i})$.
\end{enumerate}
\end{al}
In their empirical application \cite{Droumaguet2016} reported the results for fifty grid points, $G=50$, for each of the standard deviation and the correlation coefficients, however, the results were practically the same also for $G=30$.
\subsection{Sampling Vector Autoregressive Parameters}\label{ssec:var}
\noindent
Finally, we draw the state-dependent autoregressive parameters, $\beta_{s_t}$ for $s_t\in\{ 1,\dots,M\}$. The Bayesian parameter estimation of finite mixtures of regression models when the realizations of states is known has been precisely covered in \citet[Section 8.4.3]{Fruhwirth-Schnatter2006}. The procedure consists of estimating all the regression coefficients simultaneously by stacking them into $\beta = ( \beta_0, \beta_1, \dots, \beta_M) $, where $\beta_0$ is a common regression parameter for each regime, and hence is useful for the imposing of restrictions of state invariance for the autoregressive parameters. The regression model becomes:
\begin{equation}
y_t = Z_t \beta_0 + Z_t D_{i.1} \beta_1 + \dots + Z_t D_{i.M} \beta_M + \epsilon_t ,
\end{equation}
\begin{equation}
\epsilon_t \sim i.i.\mathcal{N}(\mathbf{0}, \Sigma_{s_t}) .
\end{equation}
We have here introduced the $D_{i.s_t}$, which are dummy variables taking the value 1 when the regime occurs and set to 0 otherwise. A transformation of the regressors $Z_T$ also has to be performed in order to allow for different coefficients on the dependent variables, for instance to impose zero restrictions on parameters. In the context of VARs, \citet[Section 2.2.3]{koop2010bayesian} detail a convenient notation that stacks all the regression coefficients on a diagonal matrix for every equation. We adapt this notation by stacking all the regression coefficients for all the states on diagonal matrix. If $z_{n.s_t.t}$ corresponds to the row vector of $1+Np$ independent variables for equation $n$, state $s_t$ (starting at 0 for regime-invariant parameters), and at time $t$, the stacked regressor $Z_t$ will be of the following form:
\begin{equation}
\setcounter{MaxMatrixCols}{11}
Z_t = \text{diag}\bigl( z_{1.0.t}, \dots , z_{N.0.t}, z_{1.1.t}, \dots , z_{N.1.t},\dots , z_{1.M.t}, \dots , z_{N.M.t} \bigr) .
\nonumber
\end{equation}
This notation enables the restriction of each parameter, by simply setting $z_{n.s_t.t}$ to 0 where desired.
\begin{al}\label{al:beta} \textit{Sampling the autoregressive parameters.} We assume normal prior for $\beta$, i.e. $\beta \sim \mathcal{N}(\mathbf{0}, V_{\beta}) $ .
\begin{enumerate}
\item For all $Z_t$s, impose restrictions by setting $z_{n,s_t,t}$ to zero accordingly.
\item $\beta | \mathbf{y}_T , \mathbf{S}_T , \mathbf{P}, \sigma,\mathbf{R} \sim \mathcal{N}(\overline{\beta}, \overline{V_{\beta}})$. Sample $\beta$ from the conditional normal posterior distribution, with the following parameters:
\begin{equation}
\overline{V_{\beta}} = \left( V^{-1}_{\beta} + \sum_{t=1}^{T} Z^{'}_t \Sigma_{s_t}^{-1} Z_t \right)^{-1}
\nonumber
\end{equation} and
\begin{equation}
\overline{\beta} = \overline{V_{\beta}} \left( \sum_{t=1}^{T} Z^{'}_t \Sigma_{s_t}^{-1} y_t\right).
\nonumber
\end{equation}
\end{enumerate}
\end{al}
\section{Estimation of marginal data densities}
\noindent An appropriate marginal data density estimator used for the MS-VAR model with restrictions described above is the Modified Harmonic Mean estimator proposed by \cite{Geweke1999,Geweke2005}. See \cite{Droumaguet2012} for some more details regarding the computational details for the estimator for the MS-VAR models.
% BIBLIOGRAPHY
\bibliographystyle{model5-names}
\bibliography{bib-causality}
\newpage
%\section*{R functions}\label{sec:code}
%
%\noindent File \texttt{BayesianECCCGARCH.R} includes, amongst others, two utility functions that are described below.
\small
\bigskip
\begin{center}
\rule{15cm}{.1pt}\\
\bigskip\begin{tabular}{p{3.5cm} l}
\texttt{G.MSBVAR.griddy} & \textit{Bayesian estimation of restricted MS-VAR models}
\end{tabular}
\smallskip
\rule{15cm}{.1pt}
\end{center}
\bigskip\noindent\textbf{Description}
\begin{quote}
A block Metropolis-Hasting sampler for MS($M$)-VAR($p$) models.
\end{quote}
\bigskip\noindent\textbf{Usage}
\begin{quote}
\begin{verbatim}
G.MSBVAR.griddy(N, h, priors, restrictions, starting.values,
debug=FALSE, print.iterations=50)
\end{verbatim}
\end{quote}
\bigskip\noindent\textbf{Arguments}
\begin{quote}
\begin{tabular}{p{3cm}p{10cm}}
\texttt{N} & A positive integer specifying the number of iterations of the sampling algorithm. \\
&\\
\texttt{h}& A positive integer specifying the forecast horizon.\\
&\\
\texttt{priors}& A list of objects specifying the prior distributions. It contains the following objects:
\texttt{Beta.bar} -- a $N+pN^2\times 1$ matrix of the normal prior mean $\mu_\beta$;
\texttt{V.Beta.bar.m1} -- a $N+pN^2\times N+pN^2$ covariance matrix of the normal prior $V_\beta$;
\texttt{S.mean} -- a scalar specifying the location parameter of the log-normal prior $\mu_\sigma$;
\texttt{S.sd} -- a scalar specifying the standard deviation parameter of the log-normal prior $\lambda_\sigma$;
\texttt{w} -- a $M^2\times 1$ matrix of the parameters of the Dirichlet distribution $e$.
\\
&\\
\texttt{restrictions}& A list of objects specifying the restrictions on the parameters. It contains the following objects: \texttt{Beta.0}, \texttt{Beta.St}, \texttt{Sigma.0}, \texttt{Sigma.St}, \texttt{M}, \texttt{dj} that are specified below.
\linebreak \texttt{Beta.0} -- a $(1+pN)\times N$ matrix specifying which regime-invariant autoregressive parameters of matrix $B_{s_t}$ should be estimated, where value 1 is assigned to parameters to be estimated and value 0 to parameters not to be estimated.
\texttt{Beta.St} -- a list of $M$ $(1+pN)\times N$ matrices specifying which regime-dependent autoregressive parameters of matrix $B_{s_t}$ should be estimated, where value 1 is assigned to parameters to be estimated and value 0 to parameters not to be estimated.
\texttt{Sigma.0} -- a $N\times N$ matrix specifying which regime-invariant elements of the covariance matrix $\Sigma_{s_t}$ should be estimated, where value 1 is assigned to parameters to be estimated and value 0 to parameters not to be estimated.
\texttt{Sigma.St} -- a list of $M$$N\times N$ matrices specifying which regime-dependent elements of the covariance matrices $\Sigma_{s_t}$ should be estimated, where value 1 is assigned to parameters to be estimated and value 0 to parameters not to be estimated.
\texttt{M} -- a $M^2\times d$ matrix $\mathbf{M}$ setting the restrictions on the vectorized version of the transition probability matrix $\mathbf{p}$.
\texttt{dj} -- a vector of length $Q$ specifying the values of $d_j$ for $j\in\{1,\dots,Q\}$.
\end{tabular}
\end{quote}
\begin{quote}
\begin{tabular}{p{3cm}p{10cm}}
\texttt{starting.values}& A list of objects specifying the starting values of the parameters of the model from which the algorithm is initiated. It consists of the following objects: \texttt{Y}, \texttt{U}, \texttt{xi}, \texttt{Beta}, \texttt{Beta.0}, \texttt{Beta.St}, \texttt{Sigma}, \texttt{PR\_TR}, \texttt{w}, \texttt{S}, \texttt{R}, \texttt{G} that are specified below.
\linebreak
\texttt{Y} -- a $(T+p)\times N$ matrix of data.
\texttt{U} - a $T\times N \times M$ array of state-specific error terms evaluated at the starting values of the parameters.
\texttt{xi} -- a $M\times T$ matrix specifying the realizations of the regimes taking a value of 1 when the regime occurs and 0 if it does not occur in a particular period. This matrix may be filled with values \texttt{NA} for the initialization of the algorithm.
\texttt{Beta} -- a $(1+pN)\times N \times M$ array collecting the regime-dependent and regime-invariant starting values for parameters $B_{s_t}$.
\texttt{Beta.0} -- a $(1+pN)\times N$ matrix of the starting values for the regime-invariant parameters $B_{s_t}$.
\texttt{Beta.St} -- a $(1+pN)\times N \times M$ array collecting the starting values of the regime-dependent parameters $B_{s_t}$.
\texttt{Sigma} -- a $N\times N \times M$ array collecting the regime-dependent and regime-invariant starting values for parameters $\Sigma_{s_t}$.
\texttt{S} -- a $N\times M$ matrix of the starting values for the regime-dependent standard deviations $\sigma_{s_t}$.
\texttt{R} -- a $N\times N \times M$ array collecting the starting values of the regime-dependent correlation matrices $\mathbf{R}_{s_t}$.
\texttt{PR\_TR} -- a $M\times M$ matrix of the starting values for the transition probabilities matrix $\mathbf{P}$.
\texttt{w} -- a $d\times 1$ matrix of the starting values for the unrestricted elements of the transition probabilities matrix $\mathbf{w}$.
\texttt{G} -- a positive integer number specifying the size of the grid, $G$, for the Griddy Gibbs sampler for parameters $\sigma_{s_t}$ and $\mathbf{R}_{s_t}$.
\\
&\\
\texttt{debug}& A logical value set by default to \texttt{FALSE}. If the argument is set to \texttt{TRUE} the algorithm displays information regarding the subsequent steps within each of the iteration corresponding to the groups of parameters being sampled. If the argument is set to \texttt{FALSE} the algorithm does not display such information. \\
&\\
\texttt{print.iterations}& An integer specifying how often should the algorithm's iteration count be displayed.
\end{tabular}
\end{quote}
\bigskip\noindent\textbf{Value}
\begin{quote}
A list with the following arguments:
\smallskip\begin{tabular}{p{3cm}p{10cm}}
\texttt{posteriors} & A list containing draws from the posterior distribution of the parameters. It consists of the following elements \texttt{Beta}, \texttt{Sigma}, \texttt{S}, \texttt{R}, \texttt{PR\_TR}, \texttt{w}, \texttt{S.t}, \texttt{F.S.t}, \texttt{F.Y } that are described below.
\linebreak
\texttt{Beta} -- a $(N+pN^2)\times M \times \mathtt{N}$ array of the posterior draws of parameters $\beta_{s_t}$.
\texttt{Sigma} -- a $N^2\times M \times \mathtt{N}$ array of the posterior draws of parameters $\text{vec}(\Sigma_{s_t})$.
\texttt{S} -- a $N\times M \times \mathtt{N}$ array of the posterior draws of parameters $\sigma_{s_t}$.
\texttt{R} -- a $(N-1)N/2\times M \times \mathtt{N}$ array of the posterior draws of parameters $\text{vecl}(\mathbf{R}_{s_t})$.
\texttt{PR\_TR} -- a $M^2 \times \mathtt{N}$ matrix of the posterior draws of parameters $\mathbf{p}$.
\texttt{w} -- a $d \times \mathtt{N}$ matrix of the posterior draws of parameters $\mathbf{w}$.
\texttt{S.t} -- a $T \times \mathtt{N}$ matrix of the posterior realizations of the Markov process $\mathbf{S}$.
\texttt{F.S.t} -- a $\mathtt{h} \times \mathtt{N}$ matrix of draws of the forecasted states.
\texttt{F.Y } -- a $\mathtt{h} \times N \times \mathtt{N}$ matrix of draws of the forecasted values of $y_t$.
\\
&\\
\texttt{last.draws} & A list containing the draws from the last iteration of the algorithm. This object can be delivered as a starting value for a new run of the MCMC algorithm in order to continue the chain. It consists of the following elements
\texttt{Y}, \texttt{U}, \texttt{xi}, \texttt{Beta}, \texttt{Beta.0}, \texttt{Beta.St}, \texttt{Sigma}, \texttt{PR\_TR}, \texttt{w}, \texttt{S}, \texttt{R}, \texttt{G} which definitions are identical to the elements of object \texttt{starting.values}.
\end{tabular}
\end{quote}
\newpage\noindent\textbf{Authors}
\begin{quote}
Matthieu Droumaguet and Tomasz Wo\'zniak
\end{quote}
\bigskip
\noindent\textbf{References}
\begin{quote}
\noindent Droumaguet, M., Warne, A., Wo\'zniak, T. (in press) Granger Causality and Regime Inference in Markov-Switching VAR Models with Bayesian Methods, \emph{Journal of Applied Econometrics}.
\smallskip\noindent Droumaguet, M., Wo\'zniak, T. (2012). Bayesian Testing of Granger Causality in Markov-Switching VARs, \emph{European University Institute Florence Working Paper Series}, \textbf{2012/06}, Italy.
\end{quote}
%\newpage
\vspace{1.5cm}
\begin{center}
\rule{15cm}{.1pt}\\
\bigskip\begin{tabular}{p{3.5cm} l}
\texttt{G.MHM.MSVAR} & \textit{Marginal Data Density for MS-VAR models}
\end{tabular}
\smallskip
\rule{15cm}{.1pt}
\end{center}
\bigskip\noindent\textbf{Description}
\begin{quote}
Estimates the marginal data density for MS($M$)VAR($p$) models with the estimator of \cite{Geweke1999,Geweke2005}.
\end{quote}
\bigskip\noindent\textbf{Usage}
\begin{quote}
\begin{verbatim}
G.MHM.MSVAR(Gibbs.output, priors, restrictions, alpha, debug=FALSE, print.iterations=100)
\end{verbatim}
\end{quote}
\bigskip\noindent\textbf{Arguments}
\begin{quote}
\begin{tabular}{p{3cm}p{10cm}}
\texttt{Gibbs.output} & A list. An output of the estimation procedure in \texttt{G.MSBVAR.griddy}.\\
&\\
\texttt{priors} & A list. The same as argument \texttt{priors} from \texttt{G.MSBVAR.griddy}. \\
&\\
\texttt{restrictions} & A list. The same as argument \texttt{restrictions} from \texttt{G.MSBVAR.griddy}. \\
&\\
\texttt{alpha} & A scalar of value in $(0,1)$. Truncation probability of the normal distribution from the Modified Harmonic Mean estimator. \\
&\\
\texttt{debug} & A logical value set by default to \texttt{FALSE}. If the argument is set to \texttt{TRUE} the algorithm displays information regarding the subsequent steps within each of the iteration corresponding to the groups of parameters being sampled. If the argument is set to \texttt{FALSE} the algorithm does not display such information. \\
&\\
\texttt{print.iterations} & An integer specifying how often should the algorithm's iteration count be displayed.
\end{tabular}
\end{quote}
\newpage\noindent\textbf{Value}
\begin{quote}
\begin{tabular}{p{3cm}p{10cm}}
\texttt{MHM} & A scalar with the estimate of the natural logarithm of the marginal data density. \\
&\\
\texttt{acceptance.rate} & A scalar with the acceptance rate of the draws from the posterior distribution that are used for the marginal data density estimation. The rejections are due to the truncation rate specific for the Geweke (1999,2005) estimator (with tail probability \texttt{alpha}) as well as due to the intractable values of the likelihood function evaluated at the draws. \\
\texttt{alpha} & A scalar of value in $(0,1)$ reporting the value of the argument \texttt{alpha} of the function.\\
&\\
\texttt{log.likelihood} & A $\mathtt{N}$-vector containing the values of the ordinates of the log-likelihood function at the posterior draws. \\
&\\
\texttt{log.prior} & A $\mathtt{N}$-vector containing the values of the ordinates of the natural logarithm of the prior distribution at the posterior draws. \\
&\\
\texttt{log.truncated.normal} & A $\mathtt{N}$-vector containing the values of the ordinates of the natural logarithm of the truncated normal distribution (with the mean set to the posterior mean of the parameters and the covariance matrix set to the posterior covariance matrix of the parameter vector and truncated symmetrically from both sides with probability \texttt{N}/2) evaluated at the posterior draws.\\
&\\
\texttt{posterior.draws} & A $k\times \mathtt{N}$ matrix with the posterior draws of the parameters. $k$ denotes the overall number of the parameters of the model.
\end{tabular}
\end{quote}
\bigskip\noindent\textbf{Authors}
\begin{quote}
Matthieu Droumaguet and Tomasz Wo\'zniak
\end{quote}
\bigskip\noindent\textbf{References}
\begin{quote}
\noindent Droumaguet, M., Warne, A., Wo\'zniak, T. (in press) Granger Causality and Regime Inference in Markov-Switching VAR Models with Bayesian Methods, \emph{Journal of Applied Econometrics}.
\smallskip\noindent Droumaguet, M., Wo\'zniak, T. (2012). Bayesian Testing of Granger Causality in Markov-Switching VARs, \emph{European University Institute Florence Working Paper Series}, \textbf{2012/06}, Italy.
\smallskip\noindent Geweke, J. (1999). Using Simulation Methods for Bayesian Econometric Models: Inference, Development, and Communication. \emph{Econometric Reviews}, \textbf{18}, 1-73.
\smallskip\noindent Geweke, J. (2005). Contemporary Bayesian Econometrics and Statistics. John Wiley \& Sons, Inc.
\end{quote}
\end{document}