diff --git a/Graph_Theory.pdf b/Graph_Theory.pdf index b2aa53d..1dbd75f 100644 Binary files a/Graph_Theory.pdf and b/Graph_Theory.pdf differ diff --git a/ProbAndMeasure/04_product_measures.tex b/ProbAndMeasure/04_product_measures.tex index 4608bb5..6a30564 100644 --- a/ProbAndMeasure/04_product_measures.tex +++ b/ProbAndMeasure/04_product_measures.tex @@ -107,7 +107,7 @@ \subsection{Fubini's theorem} \item Let $f \colon E \to \mathbb R$ be a $\mu$-integrable function (on the product measure). Let \[ A_1 = \qty{x_1 \in E_1 : \int_{E_2} \abs{f(x_1,x_2)} \dd{\mu_2(x_2)} < \infty}. \] - Define $f_1 : E_1 \to \mathbb{R}$ by $f_1(x_1) = \int_{E_2} f(x_1,x_2) \dd{\mu_2(x_2)}$ on $A_1$ and zero elsewhere. \\ + Define $f_1 : E_1 \to \mathbb{R}$ by $f_1(x_1) = \int_{E_2} f(x_1,x_2) \dd{\mu_2(x_2)}$ on $A_1$ and 0 elsewhere. \\ Then $\mu_1(A_1^c) = 0$, $f_1$ is $\mu_1$-integrable and $\mu(f) = \mu_1(f_1) = \mu_1(f_1 1_{A_1})$, and defining $A_2$ symmetrically, $\mu(f) = \mu_2(f_2) = \mu_2(f_2 1_{A_2})$. \end{enumerate} \end{theorem} @@ -129,8 +129,9 @@ \subsection{Fubini's theorem} So $f_1$ is $\mu_1$-integrable. We have $\mu_1(A_1^c) = 0$, otherwise $\mu_1(h) \geq \mu_1(h 1_{A_1^c}) = \infty$ \Lightning. - Note that $f_1^\pm = \int_{E_2} f^\pm(x_1,x_2) \dd{\mu_2(x_2)}$, and $\mu(f_1) = \mu_1(f_1^+) - \mu_1(f_1^-)$. - Hence, by the first part, $\mu(f) = \mu(f^+) - \mu(f^-) = \mu_1(f_1^+) - \mu_1(f_1^-) = \mu_1(f_1)$ as required. + Setting, $f_1^\pm = \int_{E_2} f^\pm(x_1,x_2) \dd{\mu_2(x_2)}$ we see than $f_1 = (f_1^+ - f_1^-) 1_{A_1}$. + Also by the first part, $\mu_1(f_1^+) = \mu(f^+) < \infty$ and $\mu_1(f_1^-) = \mu(f^-) < \infty$. + Hence, $\mu(f) =\footnote{As $f$ integrable} \mu(f^+) - \mu(f^-) = \mu_1(f_1^+) - \mu_1(f_1^-) =\footnote{As $f_1$ integrable due to $\mu_1(A_1^c) = 0$.} \mu_1(f_1)$ as required. \end{proof} \begin{remark} @@ -138,18 +139,19 @@ \subsection{Fubini's theorem} Let $(E_i, \mathcal E_i, \mu_i)$ be measure spaces with $\sigma$-finite measures. Note that $(\mathcal E_1 \otimes \mathcal E_2) \otimes \mathcal E_3 = \mathcal E_1 \otimes (\mathcal E_2 \otimes \mathcal E_3)$, by a $\pi$-system argument using Dynkin's lemma. - So we can iterate the construction of the product measure to obtain a measure $\mu_1 \otimes \dots \mu_n$, which is a unique measure on $\qty(\prod_{i=1}^n E_i \bigotimes_{i=1}^n \mathcal E_i)$ with the property that the measure of a hypercube $\mu(A_1 \times A_n)$ is the product of the measures of its sides $\mu_i(A_i)$. + So we can iterate the construction of the product measure to obtain a measure $\mu_1 \otimes \dots \otimes \mu_n$\footnote{This is associative.}, which is a unique measure on $\qty(\prod_{i=1}^n E_i, \bigotimes_{i=1}^n \mathcal E_i)$ with the property that the measure of a hypercube $\mu(A_1 \times A_n)$ is the product of the measures of its sides $\mu_i(A_i)$. In particular, we have constructed the Lebesgue measure $\mu^n = \bigotimes_{i=1}^n \mu$ on $\mathbb R^n$. - Applying Fubini's theorem, for functions $f$ that are either nonnegative and measurable or $\mu^n$-integrable, we have + Applying Fubini's theorem, for functions $f$ that are either non-negative and measurable or $\mu^n$-integrable, we have \[ \int_{\mathbb R^n} f \dd{\mu^n} = \idotsint_{\mathbb R \dots \mathbb R} f(x_1, \dots, x_n) \dd{\mu(x_1)} \dots \dd{\mu(x_n)} \] \end{remark} \subsection{Product probability spaces and independence} \begin{proposition} - Let $(\Omega, \mathcal F, \mathbb P)$, and $(E, \mathcal E) = \qty(\prod_{i=1}^n E_i, \bigotimes_{i=1}^n \mathcal E_i)$. - Let $X \colon (\Omega, \mathcal F) \to (E, \mathcal E)$ be a measurable function, and define $X(\omega) = (X_1(\omega), X_2(\omega), \dots, X_n(\omega))$. - Then the following are equivalent. + Let $X_1, \dots, X_n$ be r.v.s, $X_i : (\Omega, \mathcal F, \mathbb P) \to (E_i, \mathcal{E}_i)$. + Set $(E, \mathcal E) = \qty(\prod_{i=1}^n E_i, \bigotimes_{i=1}^n \mathcal E_i)$. + Consider $X \colon (\Omega, \mathcal F, \mathbb{P}) \to (E, \mathcal E)$ given by $X(\omega) = (X_1(\omega), X_2(\omega), \dots, X_n(\omega))$. + Then $X$ is $\mathcal{E}$-measurable and the following are equivalent. \begin{enumerate} \item $X_1, \dots, X_n$ are independent random variables; \item $\mu_X = \bigotimes_{i=1}^n \mu_{X_i}$; @@ -157,24 +159,33 @@ \subsection{Product probability spaces and independence} \end{enumerate} \end{proposition} \begin{proof} - \emph{(i) implies (ii).} - Consider the $\pi$-system $\mathcal A$ of rectangles $A = \prod_{i=1}^n A_i$ for $A_i \in \mathcal E_i$. - Since $\mu_X$ is an image measure, - Then - \[ \mu_X(A_1 \times \dots \times A_n) = \prob{X_1 \in A_1, \dots, X_n \in A_n} = \prob{X_1} \dots \prob{A_n} = \prod_{i=1}^n \mu_{X_i}(A_i) \] - So by uniqueness, the result follows. + To show $X$ measurable suffices to check $X\inv(A_1 \times \dots \times A_n) \in \mathcal{F}$, where $A_i \in \mathcal{E}_i \ \forall \; i$ as this is a $\pi$-system generating $\mathcal{E}$. + \begin{align*} + X\inv(A_1 \times \dots \times A_n) &= \qty{\omega : X_1(\omega) \in A_1, \dots, X_n(\omega) \in A_n} \\ + &= \bigcap_{i = 1}^n X_i\inv(A_i). + \end{align*} + $X_i$ measurable so $X_i\inv(A_i) \in \mathcal{F}$ and so the intersection is in $\mathcal{F}$. + + (1) $\implies$ (2): + Consider the $\pi$-system $\mathcal A$ of rectangles $A = \prod_{i=1}^n A_i$ for $A_i \in \mathcal E_i$, as this generates $\mathcal{E}$ suffices to check equality on it. + + Since $\mu_X$ is an image measure, then + \begin{align*} + \mu_X(A_1 \times \dots \times A_n) = \prob{X_1 \in A_1, \dots, X_n \in A_n} = \prob{X_1} \dots \prob{A_n} &= \prod_{i=1}^n \mu_{X_i}(A_i) \\ + &= \qty(\bigotimes_{i=1}^n \mu_{X_i})(A). + \end{align*} - \emph{(ii) implies (iii).} + (2) $\implies$ (3): By Fubini's theorem, \begin{align*} \expect{\prod_{i=1}^n f_i(X_i)} &= \mu_X\qty(\prod_{i=1}^n f_i(x_i)) \\ - &= \int_E f(x) \dd{\mu(x)} \\ + &= \int_E f(x) \dd{\mu_X(x)} \\ &= \idotsint_{E_i} \qty(\prod_{i=1}^n f_i(x_i)) \dd{\mu_{X_1}(x_1)} \dots \dd{\mu_{X_2}(x_2)} \\ &= \prod_{i=1}^n \int_{E_i} f_i(x_i) \dd{\mu_{X_i}(x_i)} \\ &= \prod_{i=1}^n \expect{f_i(X_i)} \end{align*} - \emph{(iii) implies (i).} + (3) $\implies$ (1): Let $f_i = 1_{A_i}$ for any $A_i \in \mathcal E_i$. These are bounded and measurable functions. Then diff --git a/ProbAndMeasure/05_function_spaces_and_norms.tex b/ProbAndMeasure/05_function_spaces_and_norms.tex new file mode 100644 index 0000000..b33e030 --- /dev/null +++ b/ProbAndMeasure/05_function_spaces_and_norms.tex @@ -0,0 +1,443 @@ +\section{$L^p$ Spaces, Norms and Inequalities} +\subsection{Norms} +\begin{definition}[Norm] + A \vocab{norm} on a real vector space is a map $\norm{\wildcard}_V \colon V \to \mathbb [0, \infty)$ s.t. + \begin{enumerate} + \item $\norm{\lambda v} = \abs{\lambda} \cdot \norm{v}$; + \item $\norm{u + v} \leq \norm{u} + \norm{v}$; + \item $\norm{v} = 0 \iff v = 0$. + \end{enumerate} +\end{definition} + +\begin{definition} + Let $(E, \mathcal E, \mu)$ be a measure space. + We define $L^p(E,\mathcal E,\mu) = L^p(\mu) = L^p$ for the space of measurable functions $f \colon E \to \mathbb R$ s.t. $\norm{f}_p$ is finite, where + \[ \norm{f}_p = \begin{cases} + \qty(\int_E \abs{f(x)}^p \dd{\mu(x)})^{\frac{1}{p}} & 1 \leq p < \infty \\ + \esssup \abs{f} = \inf \qty{\lambda \geq 0 : \abs{f} \leq \lambda \text{ $\mu$-a.e.}} & p = \infty + \end{cases} \] +\end{definition} + +We must check that $\norm{\wildcard}_p$ as defined is a norm. \\ +Clearly (1) holds for all $1 \leq p < \infty$ by linearity of integral and for $p = \infty$ its obvious. \\ +Property (2) holds for $p = 1$ and $p = \infty$, and we will prove later that this holds for other values of $p$ by Minkowski inequality. \\ +The last property does not hold: $f = 0$ implies $\norm{f}_p = 0$, but $\norm{f}_p = 0$ implies only that $\abs{f}^p = 0$ a.e., so $f$ is zero a.e. on $E$. + +Therefore, to rigorously define the norm, we must construct the quotient space $\mathcal L^p$ of functions that coincide a.e.. +We write $[f]$ for the equivalence class of functions that are equal a.e. +The functional $\norm{\wildcard}_p$ is then a norm on $\mathcal L^p = \qty{[f] : f \in L^p}$. + +\begin{proposition}[Chebyshev's inequality, Markov's inequality] + Let $f \colon E \to \mathbb R$ be non-negative and measurable. + Then $\forall \; \lambda > 0$, + \[ \mu(\qty{x \in E : f(x) \geq \lambda}) = \mu(f \geq \lambda) \leq \frac{\mu(f)}{\lambda} \] +\end{proposition} + +\begin{proof} + Integrate the inequality $\lambda 1_{\qty{f \geq \lambda}} \leq f$, which holds on $E$. +\end{proof} + +In particular if $g \in L^p$, $p < \infty$ and $\lambda > 0$ then $\mu(\abs{g} \geq \lambda) = \mu(\abs{g}^p \geq \lambda^p) \leq \frac{\mu(|g|^p)}{\lambda^p} \leq \infty$ this gives the tail estimates as $\lambda \to \infty$. + +\begin{definition}[Convex Function] + Let $I \subseteq R$ be an interval. + Then we say a map $c \colon I \to \mathbb R$ is \vocab{convex} if for all $x, y \in I$ and $t \in [0,1]$, we have $c(tx + (1-t)y) \leq tc(x) + (1-t)c(y)$. + Equivalently, for all $x < t < y$ and $x, y \in I$, we have $\frac{c(t) - c(x)}{t-x} \leq \frac{c(y) - c(t)}{y-t}$. +\end{definition} + +Thus a convex function is continuous on the interior of the interval and so is Borel measurable. + +\begin{lemma} + Let $I \subseteq R$ be an interval and $c : I \to \mathbb{R}$, and let $m \in$ the interior of $I$. + If $c$ is convex on $I$, $\exists \; a, b$ s.t. $c(x) \geq ax + b \ \forall \; x \in I$, and $c(m) = am + b$. +\end{lemma} + +\begin{proof} + Define $a = \sup \qty{\frac{c(m) - c(x)}{m - x} : x < m, x \in I}$. + This exists in $\mathbb R$ by the second definition of convexity. + Let $x, y \in I$, and $y > m > x$. + Then $\frac{c(m) - c(x)}{m - x} \leq a \leq \frac{c(y) - c(m)}{y - m}$, so $c(y) \geq ay - am + c(m) = ay + b$ where we define $b = c(m) - am$. + Similarly, for $x$, we have $c(x) \geq ax + b$. +\end{proof} + +\begin{theorem}[Jensen's inequality] + Let $X$ be a integrable\footnote{$\expect{\abs{X}} < \infty$} r.v. taking values in an interval $I \subseteq \mathbb R$. + Let $c \colon I \to \mathbb R$ be a convex function. + Then $\expect{c(X)}$ well-defined and + \begin{align*} + c(\expect{X}) \leq \expect{c(X)}. + \end{align*} +\end{theorem} + +% Note that the integral $\expect{c(X)}$ is defined as $\expect{c^+(X)} - \expect{c^-(X)}$, and this is well-defined and takes values in $(-\infty, \infty]$. + +\begin{proof} + If $X$ is a constant a.s., then done. + + Otherwise, then $m = \mathbb{E}[X] \in \operatorname{int}\footnote{Interior of} I$ + + Using the previous lemma, $\exists \; a, b$ s.t. $c(X) \geq aX + b$. + In particular, $(c(X))^- \leq |a| |X| + |b|$\footnote{$f \geq g$ gives $-f \leq -g$ so $f^- \leq g^- \leq |g|$.}. + Hence, $\expect{c^-(X)} \leq \abs{a} \expect{\abs{X}} + \abs{b} < \infty$, and $\expect{c(X)} = \expect{c^+(X)} - \expect{c^-(X)}$ is well-defined in $(-\infty,\infty]$. + + Integrating $c(X) \geq aX + b$\footnote{If $\mathbb{E}[c(X)] = \infty$ done.}, + \[ \expect{c(X)} \geq a \expect{X} + b\footnote{Expectation of $\Omega$ is 1, this is why we need a probability measure.} = am + b = c(m) = c(\expect{X}) \] +\end{proof} + +\begin{example} + $(\Omega, \mathcal{F}, \mathbb{P})$ and $1 \leq p \leq \infty$. + If $X \in L^\infty(\mathbb{P})$ then $X \in L^p(\mathbb{P})$ as $\norm{X}_p \leq \norm{X}_\infty$ as $\mathbb{P}(\Omega)$ finite. +\end{example} + +\begin{example} + If $1 \leq p < q < \infty$, $c(x) = \abs{x}^{\frac{q}{p}}$ is a convex function. + If $X$ is a bounded r.v. (so lies in $L^\infty(\mathbb P)$), we then have + \[ \norm{X}_p = \expect{\abs{X^p}}^{\frac{1}{p}} = c(\expect{\abs{X}^p})^{\frac{1}{q}} \mathcolor{red}{\leq}\footnote{By Jensen} \expect{c(\abs{X}^p)}^{\frac{1}{q}} = \norm{X}_q \] + Using the monotone convergence theorem, this extends to all $X \in L^q(\mathbb P)$ when $\norm{X}_q$ is finite. + So $X \in L^q \implies X \in L^p$ so $L^q(\mathbb P) \subseteq L^p(\mathbb P)$ for all $1 \leq p \leq q \leq \infty$. +\end{example} + +\begin{theorem}[H\"older's inequality] + Let $f, g$ be measurable functions on $(E,\mathcal E,\mu)$. + If $p, q$ are \vocab{conjugate}, so $\frac{1}{p} + \frac{1}{q} = 1$ and $1 \leq p \leq q \leq \infty$, we have + \[ \mu(\abs{fg}) = \int_E \abs{f(x)g(x)} \dd{\mu} \leq \norm{f}_p \cdot \norm{g}_q \] +\end{theorem} + +\begin{remark} + For $p = q = 2$, this is exactly the Cauchy--Schwarz inequality on $L^2$ (Simpler proof on Sheet 3 by considering $\int (f+g)^2 \geq 0$.). +\end{remark} + +\begin{proof} + The cases $p = 1$ or $p = \infty$ are obvious. + We can assume $f \in L^p$ and $g \in L^q$ wlog since the right hand side would otherwise be infinite. + We can also assume $f$ is not equal to zero a.e., otherwise this reduces to $0 \leq 0$. + + Hence, $\norm{f}_p > 0$. + Then, we can divide both sides by $\norm{f}_p$ and then assume $\norm{f}_p = 1$. + + Define a prob measure $\mathbb{P}$ on $\mathcal{E}$ by $\mathbb{P}(A) = \int_A |f|^p \dd{\mu}$ ($\mathbb{P}$ has prob density $|f|^p$ wrt $\mu$). + Note, for $h \geq 0$ $\int h \dd{\mathbb{P}} = \int h |f|^p \dd{\mu}$. + + The + \begin{align*} + \mu(|fg|) &= \mu(|fg| 1_{|f| > 0}) \\ + &= \int |f| |g| 1_{|f| > 0} \dd{\mu} \\ + &= \int \frac{|f|^p}{|f|^{p-1}} |g| 1_{|f| > 0} \dd{\mu} \\ + &= \int \frac{|g|}{|f|^{p-1}} 1_{|f| > 0} |f|^p \dd{\mu} \\ + &= \int \frac{|g|}{|f|^{p-1}} 1_{|f| > 0} \dd{\mathbb{P}} \\ + &= \mathbb{E}\qty[\frac{|g|}{|f|^{p-1}} 1_{|f| > 0}] \\ + &\leq \mathbb{E}\qty[\qty(\frac{|g|}{|f|^{p-1}} 1_{|f| > 0})^q]^{\frac{1}{q}}\footnote{Proven earlier by Jensen's that $\norm{X}_p \leq \norm{X}_q$ for $1 \leq p \leq q$.} \\ + &= \mathbb{E}\qty[\frac{|g|^q}{|f|^p} 1_{|f| > 0}]^{\frac{1}{q}} \\ + &= \qty(\int \frac{|g|^q}{|f|^p} 1_{|f| > 0} \dd{\mathbb{P}})^{\frac{1}{q}} \\ + &= \qty(\int |g|^q 1_{|f| > 0} \dd{\mu})^{\frac{1}{q}} \\ + &\leq \qty(\int |g|^q \dd{\mu})^{\frac{1}{q}} \\ + &= \norm{g}_q + \end{align*} +\end{proof} + +\begin{theorem}[Minkowski's inequality] + Let $f, g \colon (E, \mathcal E, \mu) \to \mathbb R$ be measurable functions. + Then for all $1 \leq p \leq \infty$, we have $\norm{f + g}_p \leq \norm{f}_p + \norm{g}_p$. +\end{theorem} + +\begin{proof} + The results for $p = 1, \infty$ are clear. + Suppose $1 < p < \infty$. + We can assume wlog that $f, g \in L^p$. + + We can integrate the pointwise inequality $\abs{f + g}^p \leq 2^p (\abs{f}^p + \abs{g}^p)$ to deduce that $\mu(|f+g|^p) \leq 2^p \qty[\mu(|f|^p) + \mu(|g|^p)] < \infty$ so $f + g \in L^p$. + We assume that $0 < \norm{f+g}_p$, otherwise the result is trivial. + Now, using H\"older's inequality with $q$ conjugate to $p$, + \begin{align*} + \norm{f+g}_p^p = \int \abs{f + g}^p \dd{\mu} &= \int \abs{f + g}^{p-1} \abs{f + g} \dd{\mu} \\ + &\leq \int \abs{f + g}^{p-1} \abs{f} \dd{\mu} + \int \abs{f + g}^{p-1} \abs{g} \dd{\mu} \\ + &\leq\footnote{By Holder's Inequality} \norm{f}_p \norm{(f+g)^{p-1}}_q + \norm{g}_p \norm{(f+g)^{p-1}}_q \\ + &\leq \qty(\int \abs{f + g}^{q(p-1)} \dd{\mu})^{\frac{1}{q}} \qty(\norm{f}_p + \norm{g}_p) \\ + &\leq \qty(\int \abs{f + g}^p \dd{\mu})^{\frac{1}{q}} \qty(\norm{f}_p + \norm{g}_p) \\ + &\leq \norm{f+g}_p^{\frac{p}{q}} \qty(\norm{f}_p + \norm{g}_p) + \end{align*} + Dividing both sides by $\norm{f+g}_p^{\frac{p}{q}}$ noting $\frac{p}{q} = p-1$, we obtain $\norm{f+g}_p \leq \norm{f}_p + \norm{g}_p$. +\end{proof} + +So the $L^p$ spaces are indeed normed spaces. + +\subsection{Banach spaces} + +\begin{definition}[Banach Space] + A \vocab{Banach space} is a complete normed vector space. +\end{definition} + +\begin{theorem}[$\mathcal L^p$ is a Banach space] + Let $1 \leq p \leq \infty$, and let $f_n \in L^p$ be a Cauchy sequence, so $\forall \; \varepsilon > 0 \ \exists \; N$ s.t. $\forall \; m, n \geq N$, we have $\norm{f_m - f_n}_p < \varepsilon$. + Then $\exists \; f \in L^p$ s.t. $f_n \to f$ in $L^p$, so $\norm{f_n - f}_p \to 0$ as $n \to \infty$. +\end{theorem} + +\begin{proof} + For this proof, we assume $p < \infty$; the other case is already proven in IB Analysis and Topology. + + Since $f_n$ is Cauchy, using $\varepsilon = 2^{-k}$ we extract a subsequence $f_{N_k}$ of $L^p$ functions s.t. + \[ S = \sum_{k=1}^\infty \norm{f_{N_{k+1}} - f_{N_k}}_p \leq \sum_{k=1}^\infty 2^{-k} < \infty \] + By Minkowski's inequality, for any $K$, we have + \[ \norm{\sum_{k=1}^K \abs{f_{N_{k+1}} - f_{N_k}}}_p \leq \sum_{k=1}^K \norm{f_{N_{k+1}} - f_{N_k}}_p \leq S < \infty. \] + So $\int \abs{\sum_{k=1}^K \abs{f_{N_{k+1}} - f_{N_k}}}^p \dd{\mu} \leq S^p < \infty$. + + By the monotone convergence theorem applied to $\abs{\sum_{k=1}^K \abs{f_{N_{k+1}} - f_{N_k}}}^p$ which increases to $\abs{\sum_{k=1}^\infty \abs{f_{N_{k+1}} - f_{N_k}}}^p$, we find + \[ \norm{\sum_{k=1}^\infty \abs{f_{N_{k+1}} - f_{N_k}}}_p \leq S < \infty \] + Since the integral is finite, we see that $\sum_{k=1}^\infty \abs{f_{N_{k+1}} - f_{N_k}}$ is finite $\mu$-a.e.. + Let $A$ be the set where this sum is finite, then $\mu(A^c) = 0$. + For any $x \in A$, $(f_{N_k}(x))$ is Cauchy as sum finite, and since $\mathbb{R}$ complete it converges. + Define, + \[ f(x) = \begin{cases} + \lim_{k \to \infty} f_{N_k}(x) & x \in A \\ + 0 & x \in A^c + \end{cases} \] + so $f_{N_k} \to f$ as $k \to \infty$ $\mu$-a.e. and $f$ measurable as the limit of measurable fcns. + + Now, by Fatou's lemma, + \begin{align*} + \norm{f_n - f}_p^p &= \mu(\abs{f_n - f}^p) \\ + &= \mu(\lim_k \abs{f_n - f_{N_k}}^p) \\ + &= \mu(\liminf_k \abs{f_n - f_{N_k}}^p) \\ + &\leq \liminf_k \mu(\abs{f_n - f_{N_k}}^p) \\ + &\leq \epsilon^p \quad \forall \; n \geq N. + \end{align*} + + Since the $f_n$ are Cauchy, + \[ \norm{f}_p \leq \underbrace{\norm{f - f_N}_p}_{\leq \varepsilon} + \underbrace{\norm{f_N}_p}_{< \infty} < \infty \] + so $f \in L^p$ and so $f_n \to f$ in $L^p$. +\end{proof} + +\begin{remark} + If $V$ is any of the spaces + \[ C([0, 1]);\quad\qty{f \text{ simple}};\quad\qty{f \text{ a finite linear combination of indicators of intervals}} \] + then $V$ is dense in $L^p((0, 1), \mathcal{B}, \lambda)$. + So the completion $\overline{(V,\norm{\wildcard})}$ is exactly $L^1(\lambda)$ (Proof on Sheet 3, first prove for finite linear combinations, use monotone class theorem, approximate continuous fcns by indicators of intervals so done). +\end{remark} + +\subsection{Hilbert spaces} + +\begin{definition}[Inner Product] + A symmetric bilinear form $\inner{\wildcard, \wildcard} \colon V \times V \to \mathbb R$ on a real vector space $V$ is called an \vocab{inner product} if $\inner{v,v} \geq 0$ and $\inner{v,v} = 0 \iff v = 0$. \\ + In this case, we can define a norm\footnote{Cauchy-Schwarz gives triangle inequality} $\norm{v} = \sqrt{\inner{v,v}}$. +\end{definition} + +\begin{definition}[Hilbert Space] + If $(V,\inner{\wildcard,\wildcard})$ is complete, we say that it is a \vocab{Hilbert space}. +\end{definition} + +\begin{corollary} + The space $\mathcal L^2$ is a Hilbert space for the inner product $\inner{f,g} = \int_E fg \dd{\mu}$. +\end{corollary} + +\begin{example} + An analog of the Pythagorean theorem holds. + Let $f, g \in L^2$, then $\norm{f + g}_2^2 = \norm{f}_2^2 + 2\inner{f,g} + \norm{g}_2^2$. +\end{example} + +\begin{example} + The parallelogram identity holds: $\norm{f+g}_2^2 + \norm{f-g}_2^2 = 2 \qty(\norm{f}_2^2 + \norm{g}_2^2)$ +\end{example} + +\begin{definition}[Orthogonal] + We say $f$ is \vocab{orthogonal} to $g$ if $\inner{f,g} = 0$. +\end{definition} + +\begin{remark} + $f$ and $g$ are orthogonal iff $\norm{f + g}_2^2 = \norm{f}_2^2 + \norm{g}_2^2$. + + For centred (mean zero) r.v.s $X, Y$, we have $\inner{X,Y} = \expect{XY} = \expect{(X - \expect{X})(Y - \expect{Y})} = \Cov{X,Y}$ which vanishes when $X$ and $Y$ are orthogonal. +\end{remark} + +\begin{definition}[Orthogonal Complement] + Let $V \subseteq L^2(\mu)$. + We define its \vocab{orthogonal complement} to be + \[ V^\perp = \qty{f \in L^2(\mu) : \inner{f,g} = 0 \quad \forall g \in V} \] +\end{definition} + +\begin{definition}[Closed Set] + We say that a subset $V$ of $\mathcal L^2$ is \vocab{closed} if any sequence $f_n \in V$ that converges in $\mathcal L^2$, its limit $f$ coincides a.e. with some $v \in V$. +\end{definition} + +\begin{theorem}[Orthogonal Projection] + Let $V$ be a \underline{closed linear subspace} of $\mathcal L^2(\mu)$. + Then $\forall \; f \in \mathcal L^2$, $\exists$ an orthogonal decomposition $f = v + u$ where $v \in V$ and $u \in V^\perp$. \\ + Moreover, $\norm{f - v}_2 \leq \norm{f - g}_2 \ \forall \; g \in V$ with equality iff $v = g$ a.e.. +\end{theorem} + +\begin{definition}[Projection] + We call $v$ the \vocab{projection} of $f$ onto $V$. +\end{definition} + +\begin{proof} + In this proof, we use $p = 2$ for all norms. + We define $d(f,V) = \inf_{g \in V} \norm{g - f}$, and let $g_n \in V$ be a sequence of functions s.t. $\norm{g_n - f} \to d(f,V)$. \\ + By the parallelogram law, + \begin{align*} + 2\norm{f - g_n}^2 + 2\norm{f - g_m}^2 &= \norm{2f - (g_n + g_m)}^2 + \norm{g_n - g_m}^2 \\ + &= 4 \norm{f - \underbrace{\frac{g_n + g_m}{2}}_{\in V}}^2 + \norm{g_n - g_m}^2 \\ + &\geq 4 d(f,V)^2 + \norm{g_n - g_m}^2 + \end{align*} + Taking the limit superior as $n, m \to \infty$, $\limsup_{m,n} 0$. + So the sequence $g_n$ is Cauchy in $L^2$, so by completeness, it converges to some $v \in L^2$. + Since $V$ is closed, $v \in V$. + In particular, $d(f,V) = \inf_{g \in V} \norm{g - f} = \norm{v - f}$. + + Note that $d(f,V)^2 \leq F(t) = \norm{f - (v+th)}^2 > d(f, V)^2 - 2t \inner{f-v, h} + t^2 \norm{h}^2$ where $t \in \mathbb R$ and $h \in V$. + Letting $t \downarrow 0$ and $t \uparrow 0$, we obtain $\inner{f - v, h} = 0$ for all $h$. + Defining $f - v = u$, we have $f = u + v$ and $u \in V^\perp$ since $h$ was arbitrary. + + For any $g \in V$, $f - g = \underbrace{f-v}_{\in V^\perp} + \underbrace{v - g}_{\in V}$. + So $\norm{f - g}^2 = \norm{f - v}^2 + \norm{v - g}^2$ hence $\norm{f - g} \geq \norm{f - v}$ with equality iff $\norm{v - g} = 0$, i.e. $v = g$ a.e.. + + % For uniqueness, suppose $f = w + z$ with $w \in V$ and $z \in V^\perp$. + % Then $v - w + u - z = f - f = 0$, so taking norms, $0 = \norm{v - w + u - z}^2 = \norm{v - w}^2 + \norm{u - z}^2$ so $v = w$ and $u = z$ (a.e.) by orthogonality. +\end{proof} + +\subsection{Conditional Expectation} + +% \begin{definition}[Sub-$\sigma$ algebra] + +% \end{definition} + +If $\mathcal{G}$ a sub-$\sigma$ algebra of $\mathcal{F}$ (i.e. $\mathcal{G} \subseteq \mathcal{F}$), then $L^2(\Omega, \mathcal{G}, \mathbb{P})$ is a closed subspace of $L^2(\Omega, \mathcal{F}, \mathbb{P})$. + +\begin{definition}[Conditional Expectation] + For $X \in L^2(\Omega, \mathcal{F}, \mathbb{P})$ s.t. $X$ measurable wrt $\mathcal{G}$, $\norm{X - Y}_2 \geq \norm{X - \mathbb{E}[X \mid \mathcal{G}]}_2 \quad \forall \; Y$ that are $\mathcal{G}$ measurable.\\ + The\footnote{A variant of the} \vocab{conditional expectation of $X$ given $\mathcal{G}$}, $\mathbb{E}[X \mid \mathcal{G}]$ is defined as the orthogonal projection of $X$ on $L^2(\Omega, \mathcal{G}, \mathbb{P})$. +\end{definition} + +\begin{question} + How to define $\mathcal{E}[X \mid \mathcal{G}]$ if $X \in L^1(\Omega, \mathcal{F}, \mathbb{P})$, see Advanced Probability. +\end{question} + +\begin{example} + Let $(G_i)_{i \in I}$ be a countable family of disjoint events whose union is $\Omega$ and set $\mathcal{G} = \sigma(G_i : i \in I)$. + Let $X$ be integrable. + Then the conditional expectation of $X$ given $\mathcal{G}$ is given by: + \begin{align*} + \mathbb{E}[X \mid G_i] = \frac{\mathbb{E}[X 1_{G_i}]}{\mathbb{P}(G_i)} \quad \forall \; i \in I. + \end{align*} + Let $Y = \sum_i \mathbb{E}[X \mid G_i] 1_{G_i}$ (i.e. if $\omega \in G_i$, $Y(\omega) = \mathbb{E}[X \mid G_i]$). + Check that $Y$ is $\mathcal{G}$-measurable; $Y \in L^2(\Omega, \mathcal{G}, \mathbb{P})$; and that $Y$ is ``the'' orthogonal projection of $X$ onto $L^2(\Omega, \mathcal{G}, \mathbb{P})$ if $X \in L^2(\Omega, \mathcal{F}, \mathbb{P})$. +\end{example} + +\subsection{$L^p$ Convergence and Uniform Integrability} + +For $(\Omega, \mathcal{F}, \mathbb{P})$, what are the implications between convergence: a.s., in $L^p$ for $1 \leq p < \infty$, in $\mathbb{P}$ and in distribution. + +Let $f_n = n 1_{(0, 1/n)}$ on $\qty((0, 1), \mathcal{B}, \lambda)$. +$f_n \to 0$ a.s. but $\mathbb{E}\abs{f_n} = \mathbb{E}[f_n] = 1 \ \forall \; n$ so a.s. $\centernot\implies$ $L^p$ convergence. + +$\mathbb{P}(|X_n - X| > \epsilon) \leq \frac{\mathbb{E}|X_n - X|^p}{\epsilon^p}$ by Markov's Inequality, so convergence in $L^p$ for $1 \leq p < \infty \implies$ convergence in $\mathbb{P}$. + +\begin{theorem}[Dominated Convergence Theorem] + Let $X_n$ be r.v.s on $(\Omega, \mathcal F, \mathbb P)$ s.t. $\abs{X_n} \leq Y$ for integrable r.v. $Y$ and they converge in $\mathbb{P}$ to $X$. + Then $X_n \to X$ in $L^1(\mathbb P)$, i.e. $\mathbb{E}|X_n - X| \to 0$. +\end{theorem} + +\begin{question} + What is the ``minimum condition'' on $(X_n)$ under which $X_n \to X$ in $\mathbb{P}$ implies $X_n \to X$ in $L^1(\mathbb{P})$. +\end{question} + +\begin{answer} + Uniformly Integrable +\end{answer} + +% \begin{proof} +% We know that $X_{n_k} \to X$ almost surely along a subsequence $n_k$. +% So $\abs{X} = \lim_k \abs{X_{n_k}} \leq C < \infty$ almost surely. +% Then +% \begin{align*} +% \expect{\abs{X_n - X}} &= \expect{\abs{X_n - X} \qty(1_{\qty{\abs{X_n - X} > \frac \varepsilon 2}} + 1_{\qty{\abs{X_n - x} \leq \frac \varepsilon 2}})} \\ +% &\leq 2 C \prob{\abs{X_n - X} \geq \frac{\varepsilon}{2}} + \frac \varepsilon 2 \\ +% &< \varepsilon +% \end{align*} +% for sufficiently large $n$. +% \end{proof} + +For $X \in L^1(\mathbb P)$, then as $\delta \to 0$, +\[ I_X(\delta) = \sup \qty{ \expect{\abs{X} 1_A} : \prob{A} \leq \delta, A \in \mathcal{F}} \to 0 \] +If not, $\exists \; \varepsilon > 0$ and $A_n \in \mathcal F$ s.t. $\prob{A_n} \leq 2^{-n}$ but $\expect{\abs{X} 1_{A_n}} \geq \varepsilon$. +Since $\sum_n \prob{A_n} < \infty$, by the first Borel--Cantelli lemma, we have $\prob{\bigcap_n \bigcup_{m \geq n} A_m} = 0$. +But $\expect{\abs{X} 1_{A_n}} \leq \expect{\abs{X} 1_{\bigcup_{m \geq n} A_m}}$. +Note that $1_{\bigcup_{m \geq n} A_m} \to 1_{\bigcap_n \bigcup_{m \geq n} A_n} = 0$ a.s., so $\expect{\abs{X} 1_{\bigcup_{m \geq n} A_m}} \to \expect{\abs{X} 1_{\bigcap_n \bigcup_{m \geq n}}} = 0$ by DCT \Lightning. + +\begin{definition}[Uniformly Integrable] + For a collection $\mathcal X \subseteq L^1(\mathbb P)$ of r.v.s, we say $\mathcal X$ is \vocab{uniformly integrable (UI)} if it is bounded in $L^1(\mathbb P)$\footnote{I.e. $\sup_{x \in \mathcal{X}} \norm{X}_1 = \sup_{x \in \mathcal{X}} |X| = I_{\mathcal{X}}(1) < \infty$.}, and + \[ I_{\mathcal X}(\delta) = \sup \qty{ \expect{\abs{X}1_A} : \prob{A} \leq \delta, X \in \mathcal X} \to 0 \text{ as } \delta \to 0.\] +\end{definition} + +\begin{remark} + \begin{enumerate} + \item Any single integrable r.v. is UI. + Also, true for any finite collection of integrable r.v.s. + Also, if $\mathcal{X} = \qty{X : X \text{ a r.v. s.t. } |X| \leq Y \text{ for some } Y \in L^1}$ as $\sup_{X \in \mathcal{X}} \mathbb{E}[|X| 1_A] \leq \mathbb{E}[Y 1_A]$ implies $I_{\mathcal{X}}(\delta) \leq I_Y(\lambda) \to 0$ as $\delta \to 0$. + \item If $\mathcal X$ is bounded in $L^p(\mathbb P)$ for $p > 1$, then by H\"older's inequality, + \[ \expect{\abs{X}1_A} \leq \underbrace{\norm{X}_p}_{\text{bounded}} \cdot \underbrace{\prob{A}^{\frac 1 q}}_{\leq \delta^{\frac 1 q} \to 0} \] + \end{enumerate} +\end{remark} + +% \begin{remark} +% Note that $X_n = n1_{\qty[0,\frac{1}{n}]}$ for the Lebesgue measure $\mu$ on $[0,1]$ is bounded in $L^1(\mathbb P)$ but not uniformly integrable. +% \end{remark} + +\begin{lemma} + $\mathcal X \subseteq L^1(\mathbb P)$ is UI $\iff \sup_{X \in \mathcal X} \expect{\abs{X} 1_{\qty{\abs{X} > K}}} \to 0$ as $K \to \infty$. +\end{lemma} + +\begin{proof} + $(\implies)$: Applying Markov's inequality, as $K \to \infty$, + \[ \prob{\abs{X} > K} \leq \frac{\expect{\abs{X}}}{K} = \frac{\expect{\abs{X}1_{\Omega}}}{K} \leq \frac{I_{\mathcal X}(1)}{K} \to 0 \] + Using the uniform integrability property using $A = \qty{\abs{X} > K}$, we obtain the required limit. + + $(\Longleftarrow)$: + \[ \expect{\abs{X}} = \expect{\abs{X}\qty(1_{\qty{\abs{X} \leq K}} + 1_{\qty{\abs{X} > K}})} \leq K + \frac{\varepsilon}{2} \] + for sufficiently large $K$. + So $\mathcal X$ is bounded in $L^1(\mathbb P)$ as required. + Then for $A$ s.t. $\prob{A} \leq \delta$, + \[ \expect{\abs{X}1_A\qty(1_{\qty{\abs{X} \leq K}} + 1_{\qty{\abs{X} > K}})} \leq K\prob{A} + \expect{\abs{X}1_{\qty{\abs{X} > K}}} \leq K\delta + \frac{\varepsilon}{2} < \varepsilon \] + for sufficiently small $\delta$. +\end{proof} + +\begin{theorem} + Let $X_n, X$ be r.v.s on $(\Omega, \mathcal F, \mathbb P)$. + Then the following are equivalent. + \begin{enumerate} + \item $X_n, X \in L^1(\mathbb P)$ and $X_n \to X$ in $L^1(\mathbb P)$. + \item $\qty{X_n : n \in \mathbb N}$ is uniformly integrable, and $X_n \to X$ in $\mathbb{P}$. + \end{enumerate} +\end{theorem} + +\begin{proof} + (1) $\implies$ (2i): + Using Markov's inequality, + \[ \prob{\abs{X_n - X} > \varepsilon} \leq \frac{\expect{\abs{X_n - X}}}{\varepsilon} \to 0 \] + so $X_n \to X$ in $\mathbb{P}$. + + Choose $N$ s.t. $\mathbb{E}|X_n - X| < \frac{\epsilon}{2} \ \forall \; n \geq N$. + Choose $\delta$ s.t. $\mathbb{E}[|X| 1_A] \leq \frac{\epsilon}{2}$ and $\mathbb{E}[|X_n|1_A] \leq \epsilon \quad \forall \; n = 1, \dots, N-1$ when $\mathbb{P}(A) < \delta$. + \begin{align*} + \expect{\abs{X_n} 1_A} \leq \expect{\abs{X_n - X} 1_A} + \expect{\abs{X} 1_A} \leq \frac{\varepsilon}{2} + \frac{\varepsilon}{2} + \end{align*} + $\{X_1, \dots, X_{N-1}, X\}$ is finite so UI. + So $\mathcal{X}$ is UI. + + (2) $\implies$ (1): + $X_n \to X$ in $\mathbb{P}$, so take a subsequence $n_k$ s.t. $X_{n_k} \to X$ a.s.. + Then, + \begin{align*} + \expect{\abs{X}} = \expect{\liminf_k \abs{X_{n_k}}} \leq\footnote{Fatou's lemma} \liminf_k \expect{\abs{X_{n_k}}} \leq I_{\mathcal X}(1) <\footnote{As $\mathcal{X}$ is UI, hence $L^1$ bounded.} \infty, + \end{align*} + so $X \in L^1(\mathbb P)$. + + Next, we define truncated r.v.s $X_n^K = \max(-K, \min(K, X_n))$ and $X^K = \max(-K, \min(K, X))$. + Then $X_n^K \to X^K$ in $\mathbb{P}$ (as $\mathbb{P}(|X_n^K - X^K| > \epsilon) \leq \mathbb{P}(|X_n - X| < \epsilon)$)\footnote{Aside: If $X_n \to X$ in $\mathbb{P}$ and $f$ cts, then $f(X_n) \to f(X)$ in $\mathbb{P}$.}. + And $|X_n^K| \leq K \quad \forall \; n$ so by BCT, $X_n^K \to X^K$ in $L^1$. + Now, + \begin{align*} + \expect{\abs{X_n - X}} &\leq \expect{\abs{X_n - X_n^K}} + \expect{\abs{X_n^K - X^K}} + \expect{\abs{X^K - X}} \\ + &= \expect{\abs{X_n} 1_{\qty{\abs{X_n} > k}}} + \expect{\abs{X_n^K - X^K}} + \expect{\abs{X}1_{\qty{\abs{X} > K}}} \\ + &< \varepsilon + \end{align*} + by choosing sufficiently large $K$ (by UI) and $n$. +\end{proof} \ No newline at end of file diff --git a/ProbAndMeasure/06_fourier_analysis.tex b/ProbAndMeasure/06_fourier_analysis.tex new file mode 100644 index 0000000..f16b34a --- /dev/null +++ b/ProbAndMeasure/06_fourier_analysis.tex @@ -0,0 +1,403 @@ +\section{Fourier transforms} +\subsection{Fourier transforms} +In this section, we will write $L^p = L^p(\mathbb R^d)$ for the space of \underline{complex valued} Borel measurable fcns on $\mathbb{R}^d$, i.e. $f \colon \mathbb R^d \to \mathbb C$ s.t. $\norm{f}_p = \qty(\int_{\mathbb R^d} \abs{f(x)}^p \dd{x})^{\frac 1p} < \infty$ for $1 \leq p < \infty$. + +\begin{remark} + For $g$ measurable s.t. $\int |g| < \infty$, define $\int g(x) \dd{x} = \int \Re(g(x)) \dd{x} + i \int \Im(g(x)) \dd{x}$. + + Note that for some $u + iv = \alpha \in \mathbb C$ with $\abs{\alpha} = 1$, + \[ \abs{\int_{\mathbb R^d} f(x) \dd{x}} = \int_{\mathbb R^d} \alpha f(x) \dd{x} = \int_{\mathbb R^d} u(x) \dd{x} + i \int_{\mathbb R^d} v(x) \dd{x} \] + But since the left hand side is real-valued, the $i \int_{\mathbb R^d} v(x) \dd{x}$ term vanishes. + So + \[ \abs{\int_{\mathbb R^d} f(x) \dd{x}} = \int_{\mathbb R^d} u(x) \dd{x} \leq \int_{\mathbb R^d} \abs{f(x)} \dd{x} \] +\end{remark} + +For $f, g \in L^2$, $\inner{f, g} = \int f(x) \overline{g(x)} \dd{\mu(x)}$ is an inner product on $L^2(\mu)$. + +For any $y \in \mathbb{R}^d$, +\begin{align*} + \int f(x - y) \dd{x} &= \int f(y - x) \dd{x} - \int f(x) \dd{x} \\ + &= \int f(-x) \dd{x}. +\end{align*} +This is by the translation invariance and $x \mapsto -x$ symmetry of $\lambda$, proved in Sheet 3. +Also, for $a \in \mathbb{R}$ with $a \neq 0$, $\int f(ax) \dd{x} = \frac{1}{a^d} \int f(x) \dd{x}$. + +\begin{definition}[Fourier Transform] + Let $f \in L^1(\mathbb R^d)$. + We define the \vocab{Fourier transform} $\hat f$ by + \[ \hat f(u) = \int_{\mathbb R^d} f(x) e^{i\inner{u,x}} \dd{x} \] + where $u \in \mathbb{R}^d$ and $\inner{u,x} = \sum_{i=1}^d u_i x_i$. +\end{definition} + +\begin{remark} + Note that $\abs{\hat f(u)} \leq \norm{f}_1 \quad \forall \; u \in \mathbb{R}^d$, i.e. $\hat{f} \in L^\infty$. + + Also, if $u_n \to u$, then $e^{i\inner{u_n,x}} \to e^{i\inner{u,x}}$ so $f(x) e^{i\inner{u_n,x}} \to f(x) e^{i\inner{u,x}}$; $|f(x) e^{i\inner{u_n,x}}| \leq |f(x)|$ and $f \in L^1$. + By the DCT $\hat f(u_n) \to \hat f(u)$. + Moreover, $\lim_{\norm{u} \to \infty} \hat{f}(u) = 0$ (Riemann-Lebesgue Lemma, Sheet 3). + Thus $\hat f \in C_0(\mathbb{R}^d) = \qty{f \text{ bounded cts and vanishing at } \pm \infty}$. + + The map is $1 - 1$ (but not onto), its injective but not surjective. +\end{remark} + +\begin{definition}[Fourier Transform] + Let $\mu$ be a finite Borel measure on $\mathbb R^d$. + We define the \vocab{Fourier transform} of the measure for $u \in \mathbb{R}^d$ by + \[ \hat\mu(u) = \int_{\mathbb R^d} e^{i\inner{u,x}} \dd{\mu(x)} \] +\end{definition} + +Note that $\abs{\hat \mu(u)} \leq \mu(\mathbb R^d)$, and $\hat \mu$ a bounded cts fcn on $\mathbb{R}^d$. +If $\mu$ has a density $f$ (wrt $\lambda$), $\hat\mu = \int_{\mathbb R^d} e^{i\inner{u,x}} f(x) \dd{x} = \hat f$. + +\begin{definition}[Characteristic Function] + Let $X$ be an $\mathbb R^d$-valued r.v.. + The \vocab{characteristic function (c.f.)} $\varphi_X$ of $X$ is the Fourier transform of its law $\mu_X = \mathbb{P} \circ X\inv$. + So, + \begin{align*} + \varphi_X(u) = \hat \mu_X(u) = \int e^{i \inner{u, x}} \underbrace{\dd{\mu_X(x)}}_{\dd{\mathbb{P}} \circ X\inv(x)}\footnote{Note that $\nu \circ f\inv(g) = \nu(f \circ g)$.} = \int e^{i\inner{u, x}} \dd{\mathbb{P}} = \expect{e^{i\inner{u,X}}}. + \end{align*} +\end{definition} + +In particular if $X$ has pdf $f$, then $\phi_X(u) = \hat{f}(u)$. + +\begin{definition}[Fourier inversion formula] + Let $f \in L^1(\mathbb R^d)$ s.t. $\hat f \in L^1(\mathbb R^d)$. + Then we say that the \vocab{Fourier inversion formula} holds for $f$ if + \[ f(x) = \frac{1}{(2\pi)^d} \int_{\mathbb R^d} \hat f(u) e^{-i\inner{u,x}} \dd{u} \] + a.e. in $\mathbb R^d$. +\end{definition} + +\begin{definition}[Plancherel identity] + Let $f \in L^1(\mathbb R^d) \cap L^2(\mathbb R^d)$. + Then the \vocab{Plancherel identity} holds for $f$ if + \[ \norm{\hat f}_2 = (2\pi)^{\frac d2} \norm{f}_2 \] +\end{definition} + +We will show that the Fourier inversion formula holds whenever $\hat f \in L^1(\mathbb R^d)$, and the Plancherel identity holds for all $f \in L^1(\mathbb R^d) \cap L^2(\mathbb R^d)$. + +\begin{remark} + Given the Plancherel identity, the Fourier transform is a linear isometry of $L^2(\mathbb R^d)$, by approximating any function in $L^2(\mathbb R^d)$ by integrable functions. +\end{remark} + +\subsection{Convolutions} +\begin{definition} + Let $f \in L^1(\mathbb R^d)$ and $\nu$ be a probability measure on $\mathbb R^d$. + We define their \emph{convolution} $f \ast \nu$ by + \[ (f \ast \nu)(x) = \begin{cases} + \int_{\mathbb R^d} f(x-y) \dd{\nu(y)} & \text{if } (y \mapsto f(x-y)) \in L^1(\nu) \\ + 0 & \text{else} + \end{cases} \] +\end{definition} + +\begin{remark} + If $1 \leq p < \infty$, by Jensen's inequality, + \begin{align*} + \int_{\mathbb R^d} \qty( \int_{\mathbb R^d} \abs{f(x-y)} \dd{\nu(y)} )^p \dd{x} &\leq \int_{\mathbb R^d} \int_{\mathbb R^d} \abs{f(x-y)}^p \dd{\nu(y)} \dd{x} \\ + &= \int_{\mathbb R^d} \int_{\mathbb R^d} \abs{f(x-y)}^p \dd{x} \dd{\nu(y)} \\ + &= \int_{\mathbb R^d} \int_{\mathbb R^d} \abs{f(x)} \dd{\nu(y)} \dd{x} \\ + &= \int_{\mathbb R^d} \abs{f(x)} \dd{x} \\ + &= \norm{f}_p^p + \end{align*} + So $f \in L^p(\mathbb R^d)$, we have $(y \mapsto f(x-y)) \in L^p(\nu)$ almost everywhere, and again by Jensen's inequality, + \[ \norm{f \ast \nu}_p^p = \int_{\mathbb R^d} \abs{ \int_{\mathbb R^d} f(x-y)\dd{\nu(y)} }^p \dd{x} \leq \int_{\mathbb R^d} \qty( \int_{\mathbb R^d} \abs{f(x-y)} \dd{\nu(y)} )^p \dd{x} \leq \norm{f}_p^p \] + Hence $f \mapsto f \ast \nu$ is a contraction on $L^p(\mathbb R^d)$. +\end{remark} + +In the case where $\nu$ has a density $g$ with respect to the Lebesgue measure, we write $f \ast g = f \ast \nu$. + +\begin{definition} + For probability measures $\mu, \nu$ on $\mathbb R^d$, their convolution $\mu \ast \nu$ is a probability measure on $\mathbb R^d$ given by the law of $X + Y$ where $X, Y$ are independent r.v.s with laws $\mu$ and $\nu$, so + \begin{align*} + (\mu \ast \nu)(A) &= \prob{X+Y \in A} \\ + &= \int_{\mathbb R^d \times \mathbb R^d} 1_A(x+y) \dd{(\mu \otimes \nu)(x, y)} \\ + &= \int_{\mathbb R^d} \int_{\mathbb R^d} 1_A(x+y) \dd{\nu(y)} \dd{\mu(x)} + \end{align*} +\end{definition} + +If $\mu$ has density $f$ with respect to the Lebesgue measure, $\mu \ast \nu$ has density $f \ast \nu$ with respect to the Lebesgue measure. +Indeed, + +\begin{align*} + (\mu \ast \nu)(A) &= \int_{\mathbb R^d} \int_{\mathbb R^d} 1_A(x+y) f(x) \dd{x} \dd{\nu(y)} \\ + &= \int_{\mathbb R^d} \int_{\mathbb R^d} 1_A(v) f(v-y) \dd{v} \dd{\nu(y)} \\ + &= \int_{\mathbb R^d} 1_A(v) \int_{\mathbb R^d}f(v-y) \dd{\nu(y)} \dd{v} \\ + &= \int_{\mathbb R^d} 1_A(v) (f \ast \nu)(v) \dd{v} +\end{align*} + +\begin{proposition} + $\widehat{f \ast \nu}(u) = \hat f(u) \hat \nu(u)$. +\end{proposition} + +\begin{proposition} + $\widehat{\mu \ast \nu}(u) = \expect{e^{i\inner{u,X+Y}}} = \expect{e^{i\inner{u,X}}e^{i\inner{u,Y}}} = \hat \mu(u) \hat \nu(u)$. +\end{proposition} + +\subsection{Fourier transforms of Gaussians} +\begin{definition} + The \emph{normal distribution} $N(0,t)$ is given by the probability density function + \[ g_t(x) = \frac{1}{\sqrt{2\pi t}} e^{-\frac{x^2}{2t}} \] +\end{definition} +If $\varphi_X$ is the characteristic function of a standard normal r.v., by integration by parts, +\begin{align*} + \dv{u} \varphi_X(u) &= \dv{u} \int_{\mathbb R} e^{iux} g_1(x) \dd{x} \\ + &= \int_{\mathbb R} g_1(x) \dv{u} e^{iux} \dd{x} \\ + &= \frac{i}{\sqrt{2\pi}} \int_{\mathbb R} \underbrace{e^{iux}}_{v} \underbrace{x e^{-\frac{x^2}{2}}}_{w'} \dd{x} \\ + &= \frac{i^2}{\sqrt{2\pi}} \int_{\mathbb R} u e^{iux} e^{-\frac{x^2}{2}} \dd{x} \\ + &= -u \varphi_X(u) +\end{align*} +Hence, +\[ \dv{u}\qty(e^{\frac{u^2}{2}} \varphi_X(u)) = ue^{\frac{u^2}{2}} \varphi_X(u) - e^{\frac{u^2}{2}} u \varphi_X(u) = 0 \] +In particular, $\varphi_X(u) = \varphi_X(0) e^{-\frac{u^2}{2}} = e^{-\frac{u^2}{2}}$. +In other words, $\hat g_1(u) = \sqrt{2\pi} g_1(u)$. + +In $\mathbb R^d$, consider a Gaussian random vector $Z = (Z_1, \dots, Z_d)$ with independent and identically distributed entries $Z_i \sim N(0,1)$. +Then, the joint probability density function of $\sqrt{t}Z$ is +\[ g_t(x) = \prod_{j=1}^d \frac{1}{\sqrt{2\pi t}} e^{-\frac{x_j^2}{2t}} = (2\pi t)^{-\frac{d}{2}} e^{-\frac{\norm{x}^2}{2t}} \] +The Fourier transform of $g_t$ is +\[ \hat g_t(u) = \expect{e^{i\inner{u,\sqrt{t}Z}}} = \expect{\prod_{j=1}^d e^{iu_j \sqrt{t} z_j}} = \prod_{j=1}^d \expect{e^{iu_j \sqrt{t} z_j}} = \prod_{j=1}^d e^{-u_j^2 \frac{t}{2}} = e^{-\frac{\norm{u}^2 t}{2}} \] +which implies that in general, $\hat g_t(u) = (2\pi)^{\frac{d}{2}} t^{\frac{d}{2}} g_{\frac{1}{t}}(u)$. +Taking the Fourier transform with respect to $u$, $\hhat g_t = (2\pi)^d g_t$, and since $g_t(-x) = g_t(x)$ and the Lebesgue measure is translation invariant, we have +\[ g_t(x) = \frac{1}{(2\pi)^d} \hhat g_t(x) = \frac{1}{(2\pi)^d} \int_{\mathbb R^d} e^{-i\inner{u,x}} \hat g_t(u) \dd{u} \] +so the Fourier inversion theorem holds for such Gaussian random vectors. +\begin{definition} + We say that a function on $\mathbb R^d$ is a \emph{Gaussian convolution} if it is of the form + \[ f \ast g_t(x) = \int_{\mathbb R^d} f(x-y) g_t(y) \dd{y} \] + where $x \in \mathbb R^d, t > 0, f \in L^1(\mathbb R^d)$. +\end{definition} +We can show that $f \ast g_t$ is continuous on $\mathbb R^d$, and $\norm{f \ast g_t}_1 \leq \norm{f}_1$. +Note that $\widehat{f \ast g_t}(u) = \hat f(u) e^{-\frac{\norm{u}^2 t}{2}}$, so $\norm{\widehat{f \ast g_t}}_\infty \leq \norm{f}_1$, giving $\norm{\widehat{f \ast g_t}}_1 \leq \norm{f}_1 (2\pi)^{\frac{d}{2}} t^{-\frac{d}{2}} < \infty$. +\begin{lemma} + The Fourier inversion theorem holds for all Gaussian convolutions. +\end{lemma} +\begin{proof} + We can use the Fourier inversion theorem for $g_t(y)$ to see that + \begin{align*} + (2\pi)^d f \ast g_t(x) &= (2\pi)^d \int_{\mathbb R^d} f(x-y) g_t(y) \dd{y} \\ + &= \int_{\mathbb R^d} f(x-y) \int_{\mathbb R^d} e^{-i\inner{u,y}} \hat g_t(u) \dd{u} \dd{y} \\ + &= \int_{\mathbb R^d} e^{-i\inner{u,x}} \int_{\mathbb R^d} f(x-y) e^{i\inner{u,x-y}} \dd{y} \hat g_t(u) \dd{u} \\ + &= \int_{\mathbb R^d} e^{-i\inner{u,x}} \int_{\mathbb R^d} f(z) e^{i\inner{u,z}} \dd{z} \hat g_t(u) \dd{u} \\ + &= \int_{\mathbb R^d} e^{-i\inner{u,x}} \hat f(u) \hat g_t(u) \dd{u} \\ + &= \int_{\mathbb R^d} e^{-i\inner{u,x}} \widehat{f \ast g_t}(u) \dd{u} + \end{align*} +\end{proof} +\begin{remark} + If $\mu$ is a finite measure, then $\mu \ast g_t = \mu \ast g_{\frac{t}{2}} \ast g_{\frac{t}{2}}$ with $\mu \ast g_{\frac{t}{2}} \in L^1$, so is also a Gaussian convolution. +\end{remark} +\begin{lemma}[Gaussian convolutions are dense in $L^p$] + Let $f \in L^p$ where $1 \leq p < \infty$. + Then $\norm{f \ast g_t - f}_p \to 0$ as $t \to 0$. +\end{lemma} +\begin{proof} + One can easily show that the space $C_c(\mathbb R^d)$ of continuous functions of compact support is dense in $L^p$. + Hence, for all $\varepsilon > 0$, there exists $h \in C_c(\mathbb R^d)$ s.t. $\norm{f - h}_p < \frac{\varepsilon}{3}$, and by properties of the convolution, we also obtain + \[ \norm{f \ast g_t - h \ast g_t}_p = \norm{(f - h) \ast g_t}_p \leq \norm{f - h}_p < \frac{\varepsilon}{3} \] + So + \[ \norm{f \ast g_t - f}_p \leq \norm{f \ast g_t - h \ast g_t}_p + \norm{h \ast g_t + h}_p + \norm{h - f}_p < \frac{\varepsilon}{2} + \norm{h \ast g_t - h}_p \] + so it suffices to prove the result for $f = h \in C_c(\mathbb R^d)$. + We define a new map + \[ e(y) = \int_{\mathbb R^d} \abs{h(x-y) - h(x)}^p \dd{x} \] + Since $h$ is bounded on its bounded support, the dominated convergence theorem implies that $e$ is continuous at $y = 0$. + Note that $e(y) \leq 2^{p+1} \norm{h}_p^p$. + Hence, by Jensen's inequality, + \begin{align*} + \norm{h \ast g_t - h}_p^p &= \int_{\mathbb R^d} \abs{ \int_{\mathbb R^d} (h(x-y) - h(x)) g_t(y) \dd{y} }^p \dd{x} \\ + &\leq \int_{\mathbb R^d} \int_{\mathbb R^d} \abs{h(x-y) - h(x)}^p \dd{x} g_t(y) \dd{y} \\ + &= \int_{\mathbb R^d} e(y) g_t(y) \dd{y} \\ + &= \int_{\mathbb R^d} \underbrace{e(\sqrt{t} z)}_{\to e(0) = 0 \text{ as } t \to 0} g_1(z) \dd{z} \\ + &\to 0 + \end{align*} +\end{proof} +% TODO: Convert random instances of g into g_t +\begin{theorem}[Fourier inversion] + Let $f \in L^1(\mathbb R^d)$ be s.t. $\hat f \in L^1(\mathbb R^d)$. + Then for almost all $x \in \mathbb R^d$, + \[ f(x) = \frac{1}{(2\pi)^d} \int_{\mathbb R^d} e^{-i\inner{u,x}} \hat f(u) \dd{u} \] +\end{theorem} +\begin{remark} + This proves that the Fourier transform is injective; $\hat f = \hat g$ implies $\widehat{f - g} = 0$ so by Fourier inversion, $f = g$ almost everywhere. + The identity holds everywhere on $\mathbb R^d$ for the (unique) continuous representative $f$ in its equivalence class. +\end{remark} +\begin{proof} + The Fourier inversion theorem holds for the following Gaussian convolution for all $t$. + \[ f \ast g_t(x) = \frac{1}{(2\pi)^d} \int_{\mathbb R^d} e^{-i\inner{u,x}} \hat f(u) e^{\frac{-\abs{u}^2 t}{2}} \dd{u} = f_t(x) \] + Now, since Gaussian convolutions are dense, $f \ast g_t \to f$ in $L^1$, so $f \ast g_t \to f$ in measure by Markov's inequality. + Hence, along a subsequence, $f \ast g_{t_k} \to f$ almost everywhere. + On the other hand, by the dominated convergence theorem with dominating function $\abs{\hat f}$, the right hand side converges to $\frac{1}{(2\pi)^d} \int_{\mathbb R^d} e^{-i\inner{u,x}} \hat f(u) \dd{u}$. + So this is equal to $\lim_{t_k \to 0} f_{t_k}$ almost everywhere by uniqueness of limits. +\end{proof} +\begin{theorem}[Plancherel] + Let $f \in L^1(\mathbb R^d) \cap L^2(\mathbb R^d)$. + Then $\norm{f}_2 = (2\pi)^{-\frac{d}{2}} \norm{\hat f}_2$. +\end{theorem} +\begin{remark} + By the Pythagorean identity, $\inner{f, g} = (2\pi)^{-d} \inner{\hat f, \hat g}$. +\end{remark} +\begin{proof} + Initially, we assume $\hat f \in L^1$. + In this case, $f, \hat f \in L^\infty$, and $(x,u) \mapsto f(x)\hat f(u)$ is integrable for the product Lebesgue measure $\dd{x} \otimes \dd{u}$ on $\mathbb R^d \times \mathbb R^d$, so Fubini's theorem for bounded functions applies. + \begin{align*} + (2\pi)^d \norm{f}_2^2 &= (2\pi)^d \int_{\mathbb R^d} f(x) \overline{f(x)} \dd{x} \\ + &= \int_{\mathbb R^d} \qty(\int_{\mathbb R^d} e^{-i\inner{u,x}} \hat f(u) \dd{u}) \overline{f(x)} \dd{x} \\ + &= \int_{\mathbb R^d} \hat f(u) \overline{\int_{\mathbb R^d} e^{i\inner{u,x}} f(x) \dd{x}} \dd{u} \\ + &= \int_{\mathbb R^d} \hat f(u) \overline{\hat f(u)} \dd{u} \\ + &= \norm{\hat f}_2^2 + \end{align*} + To extend this result to general $f$, we take the Gaussian convolutions $f \ast g_t = f_t$ s.t. $f_t \to f$ in $L^2$. + By the continuity of the norm, $\norm{f_t}_2 \to \norm{f}_2$. + Since $\abs{\hat f(u) e^{-\frac{\abs{u}^2 t}{2}}}^2$ increases to $\abs{\hat f(u)}^2$, we have by monotone convergence that $\norm{\hat f_t}_2^2 \uparrow \norm{\hat f}_2^2$. + Therefore, since the Plancherel identity holds for the $f_t$, + \[ \norm{f}_2^2 = \lim_{t \to 0} \norm{f_t}_2^2 = \lim_{t \to 0} (2\pi)^{-d} \norm{\hat f_t}_2^2 = (2\pi)^{-d} \norm{\hat f}_2^2 \] +\end{proof} +\begin{remark} + Since $L_1 \cap L_2$ is dense in $L^2$, we can extend the linear operator $F_0(f) = (2\pi)^{-\frac{d}{2}} \hat f$ to $L^2$ by continuity to a linear isometry $F \colon L^2 \to L^2$ known as the \emph{Fourier--Plancherel transform}. + One can show that $F$ is surjective with inverse $F^{-1} \colon L^2 \to L^2$. +\end{remark} +\begin{example} + Consider the Dirac measure $\delta_0$ on $\mathbb R$, so $\hat \delta_0(u) = \int_{\mathbb R} e^{iux} \dd{\delta_0(x)} = 1$. + But the inverse Fourier transform would be $\frac{1}{2\pi} \int_{\mathbb R} e^{iux} \dd{u}$ which is not a Lebesgue integrable function. +\end{example} +\begin{theorem} + Let $X$ be a random vector in $\mathbb R^d$ with law $\mu_X$. + Then the characteristic function $\varphi_X = \hat \mu_X$ uniquely determines $\mu_X$. + In addition, if $\varphi_X \in L^1$, then $\mu_X$ has a probability density function $f_X$ which can be computed almost everywhere by $\frac{1}{(2\pi)^d} \int_{\mathbb R^d} e^{-i\inner{u,x}} \varphi_X(u) \dd{u}$. +\end{theorem} +\begin{proof} + Let $Z = (Z_1, \dots, Z_d)$ be a vector of independent and identically distributed r.v.s, independent of $X$, with $Z_j \sim N(0,1)$. + Then $\sqrt{t} Z$ has probability density function $g_t$. + Then $X + \sqrt{t} Z$ has probability density function $f_t = \mu_X \ast g_t$. + This is a Gaussian convolution since $\mu_X \ast g_t = \mu_X \ast g_{\frac t 2} \ast g_{\frac t 2}$. + Hence, + \[ f_t(x) = \frac{1}{(2\pi)^d} \int_{\mathbb R^d} e^{i\inner{u,x}} \varphi_X(u) e^{-\frac{\abs{u}^2 t}{2}} \dd{u} \] + which is uniquely determined by $\varphi_X$. + We show on an example sheet that two Borel probability measures $\mu, \nu$ on $\mathbb R^d$ coincide if and only if $\mu(g) = \nu(g)$ for all $g \colon \mathbb R^d \to \mathbb R$ that are bounded, continuous, and have compact support. + Now, + \[ \int_{\mathbb R^d} g(x) f_t(x) \dd{x} = \expect{\underbrace{g(X + \sqrt{t} Z)}_{\to X \text{ a.s.}}} \] + Since $\abs{g(X + \sqrt{t}Z)} \leq \norm{g}_\infty < \infty$, by the bounded convergence theorem, this converges to $\expect{g(X)} = \int_{\mathbb R^d} g(x) \dd{\mu_X(x)}$. + So by uniqueness of limits, $\varphi_X$ determines $\mu_X$. + + If $\varphi_X \in L^1$, by dominated convergence, $f_t(x)$ converges everywhere to some function $f_X$. + In particular, since $\mu_X \ast g_t \geq 0$, the limit $f_X$ is also nonnegative on $\mathbb R^d$. + Then, for any bounded continuous function on compact support $g \in C^b_c(\mathbb R^d)$, + \[ \int_{\mathbb R^d} g(x) f_X(x) \dd{x} = \int_{\mathbb R^d} g(x) \lim_{t \to 0} \underbrace{f_t(x)}_{\norm{\varphi_X}_1} \dd{x} = \lim_{t \to 0} \int_{\mathbb R^d} g(x) f_t(x) \dd{x} = \int_{\mathbb R^d} g(x) \dd{\mu_X(x)} \] + by the dominated convergence theorem, since $g$ has compact support. +\end{proof} +\begin{definition} + A sequence $(\mu_n)_{n \in \mathbb N}$ of Borel probability measures on $\mathbb R^d$ \emph{converges weakly} to a Borel probability measure $\mu$ if $\mu_n(g) \to \mu(g)$ for all $g \colon \mathbb R^d \to \mathbb R$ bounded and continuous. + If $(X_n)_{n \in \mathbb N}, X$ are random vectors with laws $(\mu_{X_n}), \mu_X$ s.t. $\mu_{X_n}$ converges weakly to $\mu_X$, we say $(X_n)$ converges weakly to $X$. +\end{definition} +\begin{remark} + If $d = 1$, weak convergence is equivalent to convergence in distribution; this is proven on an example sheet. + One can also show that convergence of $\mu_n(g)$ to $\mu(g)$ for all $g \in C_c^\infty(\mathbb R^d)$ suffices to show weak convergence, where $C_c^\infty(\mathbb R^d)$ is the space of smooth functions of compact support. + This is equivalent to the notion of weak-${}^\star$ convergence on the function space $C_b(\mathbb R^d)$. +\end{remark} +\begin{theorem}[L\'evy's continuity theorem] + Let $X_n, X$ be random vectors in $\mathbb R^d$, s.t. $\varphi_{X_n}(u) \to \varphi_X(u)$ for all $u$, as $n \to \infty$. + Then $\mu_{X_n} \to \mu_X$ weakly. +\end{theorem} +\begin{remark} + The converse holds by definition of weak convergence, testing against the complex exponentials in the Fourier transform. +\end{remark} +\begin{proof} + Let $Z = (Z_1, \dots, Z_d)$ be a vector of standard normal r.v.s, independent from each other, $X_n$, and $X$. + Let $g \in C_c^\infty(\mathbb R^d)$. + Then $g \in L^1(\mathbb R^d)$, and is Lipschitz by the mean value theorem, as its first derivative is bounded. + Let $\abs{g(x) - g(y)} \leq \norm{g}_{\mathrm{Lip}} \abs{x - y}$. + Let $\varepsilon > 0$. + Let $t > 0$ be sufficiently small s.t. $\sqrt t \norm{g}_{\mathrm{Lip}} \expect{\abs{Z}} < \frac{\varepsilon}{3}$. + Then, + \begin{align*} + \abs{\mu_{X_n}(g) - \mu_X(g)} &= \abs{\expect{g(X_n)} - \expect{g(X)}} \\ + &\leq \expect{\abs{g(X_n) - g(X_n + \sqrt t Z)}} + \expect{\abs{g(X) - g(X + \sqrt t Z)}} \\ + &+ \abs{\expect{g(X_n + \sqrt t Z) - g(X + \sqrt t Z)}} \\ + &\leq 2\norm{g}_{\mathrm{Lip}}\sqrt t \expect{\abs{Z}} + \abs{\expect{g(X_n + \sqrt t Z) - g(X + \sqrt t Z)}} \\ + &\leq \frac{2\varepsilon}{3} + \abs{\expect{g(X_n + \sqrt t Z) - g(X + \sqrt t Z)}} + \end{align*} + We show that the remaining term can be made less than $\frac{\varepsilon}{3}$ as $n \to \infty$. + Let $f_{t,n}(x) = g_t \ast \mu_{X_n}$. + Then, by Fourier inversion for Gaussian convolutions, + \begin{align*} + \expect{g(X_n + \sqrt t Z)} &= \int_{\mathbb R^d} g(x) f_{t,n}(x) \dd{x} \\ + &= \frac{1}{(2\pi)^d} \int_{\mathbb R^d} g(x) \int_{\mathbb R^d} e^{-i\inner{u,x}} \varphi_{X_n}(u) e^{-\frac{\abs{u}^2 t}{2}} \dd{u} \dd{x} + \end{align*} + Since characteristic functions are bounded by 1, we can apply the dominated convergence theorem with dominating function $\abs{g(x)} e^{-\frac{\abs{u}^2 t}{2}}$ to find + \begin{align*} + \expect{g(X_n + \sqrt t Z)} &\to \frac{1}{(2\pi)^d} \int_{\mathbb R^d} g(x) \int_{\mathbb R^d} e^{-i\inner{u,x}} \varphi_X(u) e^{-\frac{\abs{u}^2 t}{2}} \dd{u} \dd{x} \\ + &= \int_{\mathbb R^d} g(x) f_t(x) \dd{x} \\ + &= \expect{g(X + \sqrt t Z)} + \end{align*} + where $f_t = g_t \ast \mu_X$. + So as $n \to \infty$, the difference between these two terms can be made less than $\frac{\varepsilon}{3}$ as required. +\end{proof} +\begin{theorem}[central limit theorem] + Let $X_1, \dots, X_n$ be independent and identically distributed r.v.s with $\expect{X_i} = 0$ and $\Var{X_i} = 1$. + Let $S_n = \sum_{i=1}^n X_n$. + Then + \[ \frac{1}{\sqrt{n}} S_n \xrightarrow{\text{weakly}} Z \sim N(0,1) \] + In particular, + \[ \prob{\frac{1}{\sqrt{n}} S_n \leq x} \to \prob{Z \leq x} \] +\end{theorem} +\begin{proof} + Let $X = X_1$. + The characteristic function $\varphi(u) = \varphi_X(u) = \expect{e^{iuX}}$ satisfies $\varphi(0) = 1$, $\varphi'(u) = i \expect{X e^{iuX}}$, $\varphi''(u) = i^2 \expect{X^2 e^{iuX}}$. + We can find $\varphi'(0) = i\expect{X} = 0$ and $\varphi''(0) = -\expect{X^2} = -\Var X = -1$. + By Taylor's theorem, $\varphi(v) = 1 - \frac{v^2}{2} + o(v^2)$ as $v \to 0$. + Now, denoting $\varphi_n(u) = \varphi_{\frac{1}{\sqrt n} S_n}(u)$, we can write + \begin{align*} + \varphi_n(u) &= \expect{e^{i\frac{u}{\sqrt n} (X_1 + \dots + X_n)}} \\ + &= \prod_{j=1}^n \expect{e^{i\frac{u}{\sqrt n} X_j}} \\ + &= \qty[\varphi\qty(\frac{u}{\sqrt n})]^n \\ + &= \qty[1 - \frac{u^2}{2n} + o\qty(\frac{1}{n})]^n + \end{align*} + The complex logarithm satisfies $\log(1 + z) = z + o(z)$, so by taking logarithms, we find + \[ \log \varphi_n(u) = n \log\qty(1 - \frac{u^2}{2n} + o\qty(\frac{1}{n})) = -\frac{u^2}{2} \] + Hence, $\varphi_n(u) \to e^{-\frac{\abs{u}^2}{2}} = \varphi_Z(u)$. + So by L\'evy's continuity theorem, the result follows. +\end{proof} +\begin{remark} + This theorem extends to $\mathbb R^d$ by using the next proposition, using the fact that $X_n \to X$ weakly in $\mathbb R^d$ if and only if $\inner{X_n, v} \to \inner{X, v}$ weakly in $\mathbb R$ for all $v \in \mathbb R^d$. +\end{remark} +\begin{definition} + A r.v. $X$ in $\mathbb R^d$ is called a \emph{Gaussian vector} if $\inner{X_n, v}$ are Gaussian for each $v \in \mathbb R^d$. +\end{definition} +\begin{proposition} + Let $X$ be a Gaussian vector in $\mathbb R^d$. + Then $Z = AX + b$ is a Gaussian vector in $\mathbb R^m$ where $A$ is an $m \times d$ matrix and $b \in \mathbb R^m$. + Also, $X \in L^2(\mathbb R^d)$, and $\mu = \expect{X}$ and $V = \Cov{X_i, X_j}$ exist and determine $\mu_X$. + The characteristic function is + \[ \varphi_X(u) = e^{i\inner{\mu,u} - \frac{\inner{u,Vu}}{2}} \] + If $V$ is invertible, then $\mu_X$ has a probability density function + \[ f_X(x) = (2\pi)^{-\frac{d}{2}} (\det V)^{-\frac{1}{2}} \exp{-\inner{x-\mu, V^{-1}(x - \mu)}} \] + Subvectors $X_{(1)}, X_{(2)}$ of $X$ are independent if and only if $\Cov{X_{(1)}, X_{(2)}} = 0$. +\end{proposition} +\begin{proposition} + Let $X_n \to X$ weakly in $\mathbb R^d$ as $n \to \infty$. + Then, + \begin{enumerate} + \item if $h \colon \mathbb R^d \to \mathbb R^k$ is continuous, then $h(X_n) \to h(X)$ weakly; + \item if $\abs{X_n - Y_n} \to 0$ in probability, then $Y_n \to X$ weakly; + \item if $Y_n \to c$ in probability where $c$ is constant on $\Omega$, then $(X_n, Y_n) \to (X, c)$ weakly in $\mathbb R^d \times \mathbb R^d$. + \end{enumerate} +\end{proposition} +\begin{remark} + Combining parts (iii) and (i), $X_n + Y_n \to X + c$ weakly if $Y_n \to c$ in probability. + If $d = 1$, then in addition $X_n Y_n \to c X$ weakly. +\end{remark} +\begin{proof} + \emph{Part (i).} + This follows from the fact that $gh$ is continuous for any test function $g$. + + \emph{Part (ii).} + Let $g \colon \mathbb R^d \to \mathbb R$ be bounded and Lipschitz continuous. + Then + \[ \abs{\expect{g(Y_n)} - \expect{g(X)}} \leq \underbrace{\abs{\expect{g(X_n)} - \expect{g(X)}}}_{< \frac{\varepsilon}{3}} + \expect{\abs{g(X_n) - g(Y_n)}} \] + where the bound on $\expect{g(X_n)} - \expect{g(X)}$ holds for sufficiently large $n$. + Then the remaining term is upper bounded by + \[ \expect{\abs{g(X_n) - g(Y_n)}} \qty(1_{\qty{\abs{X_n - Y_n} \leq \frac{\varepsilon}{3\norm{g}_{\mathrm{Lip}}}}} + 1_{\qty{\abs{X_n - Y_n} > \frac{\varepsilon}{3\norm{g}_{\mathrm{Lip}}}}} ) \] + \[ \leq \norm{g}_{\mathrm{Lip}} \frac{\varepsilon}{3\norm{g}_{\mathrm{Lip}}} + 2\norm{g}_\infty \prob{\abs{X_n - Y_n} > \frac{\varepsilon}{3\norm{g}_{\mathrm{Lip}}}} < \frac{2\varepsilon}{3} \] + for sufficiently large $n$. + + \emph{Part (iii).} + $\abs{(X_n, c) - (X_n, Y_n)} = \abs{Y_n - c} \to 0$ in probability. + Also, $\expect{g(X_n, c)} \to \expect{g(X, c)}$ for all bounded continuous maps $g \colon \mathbb R^d \times \mathbb R^d \to \mathbb R$, so $(X_n, c) \to (X, c)$ weakly. + Hence, by (ii), $(X_n, Y_n) \to (X, c)$ weakly. +\end{proof} diff --git a/ProbAndMeasure/probmeasure.pdf b/ProbAndMeasure/probmeasure.pdf index 352c35d..34f37f1 100644 Binary files a/ProbAndMeasure/probmeasure.pdf and b/ProbAndMeasure/probmeasure.pdf differ diff --git a/ProbAndMeasure/probmeasure.tex b/ProbAndMeasure/probmeasure.tex index 5a70899..bdf3d50 100644 --- a/ProbAndMeasure/probmeasure.tex +++ b/ProbAndMeasure/probmeasure.tex @@ -15,7 +15,11 @@ \newcommand{\expect}[1]{\mathbb{E}\left[{#1}\right]} % \DeclarePairedDelimiter\ceil{\lceil}{\rceil} \DeclarePairedDelimiter\floor{\lfloor}{\rfloor} +% \renewcommand{\norm}[1]{\left \lVert #1 \right \rVert} +\newcommand{\wildcard}{{}\cdot{}} +\DeclareMathOperator*{\esssup}{ess\ sup} % \DeclarePairedDelimiter\Brackets{[\![}{]\!]} +\newcommand{\hhat}[1]{\hat{\hat{#1}}} % \includeonly{02_measurable_functions.tex} @@ -63,4 +67,6 @@ \include{02_measurable_functions.tex} \include{03_integration.tex} \include{04_product_measures.tex} + \include{05_function_spaces_and_norms.tex} + \include{06_fourier_analysis.tex} \end{document} \ No newline at end of file diff --git a/preamble.tex b/preamble.tex index 65d5c8f..8cf54b4 100644 --- a/preamble.tex +++ b/preamble.tex @@ -261,7 +261,7 @@ \newtheorem{remark}{Remark} \newtheorem*{note}{Note} -% Fancy Section and Chapter Heads +% Fancy Section and Chapter Heads %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \makeatletter @@ -324,7 +324,7 @@ \usepackage{cancel} \newcommand\hcancel[2][black]{\setbox0=\hbox{$#2$}% -\rlap{\raisebox{.45\ht0}{\textcolor{#1}{\rule{\wd0}{1pt}}}}#2} +\rlap{\raisebox{.45\ht0}{\textcolor{#1}{\rule{\wd0}{1pt}}}}#2} \newcommand\Ccancel[2][black]{ \let\OldcancelColor\CancelColor