diff --git a/Graph_Theory.pdf b/Graph_Theory.pdf
index b2aa53d..1dbd75f 100644
Binary files a/Graph_Theory.pdf and b/Graph_Theory.pdf differ
diff --git a/ProbAndMeasure/04_product_measures.tex b/ProbAndMeasure/04_product_measures.tex
index 4608bb5..6a30564 100644
--- a/ProbAndMeasure/04_product_measures.tex
+++ b/ProbAndMeasure/04_product_measures.tex
@@ -107,7 +107,7 @@ \subsection{Fubini's theorem}
 		\item Let $f \colon E \to \mathbb R$ be a $\mu$-integrable function (on the product measure).
 		Let
 		\[ A_1 = \qty{x_1 \in E_1 : \int_{E_2} \abs{f(x_1,x_2)} \dd{\mu_2(x_2)} < \infty}. \]
-		Define $f_1 : E_1 \to \mathbb{R}$ by $f_1(x_1) = \int_{E_2} f(x_1,x_2) \dd{\mu_2(x_2)}$ on $A_1$ and zero elsewhere. \\
+		Define $f_1 : E_1 \to \mathbb{R}$ by $f_1(x_1) = \int_{E_2} f(x_1,x_2) \dd{\mu_2(x_2)}$ on $A_1$ and 0 elsewhere. \\
 		Then $\mu_1(A_1^c) = 0$, $f_1$ is $\mu_1$-integrable and $\mu(f) = \mu_1(f_1) = \mu_1(f_1 1_{A_1})$, and defining $A_2$ symmetrically, $\mu(f) = \mu_2(f_2) = \mu_2(f_2 1_{A_2})$.
 	\end{enumerate}
 \end{theorem}
@@ -129,8 +129,9 @@ \subsection{Fubini's theorem}
 	So $f_1$ is $\mu_1$-integrable.
 	We have $\mu_1(A_1^c) = 0$, otherwise $\mu_1(h) \geq \mu_1(h 1_{A_1^c}) = \infty$ \Lightning.
 
-	Note that $f_1^\pm = \int_{E_2} f^\pm(x_1,x_2) \dd{\mu_2(x_2)}$, and $\mu(f_1) = \mu_1(f_1^+) - \mu_1(f_1^-)$.
-	Hence, by the first part, $\mu(f) = \mu(f^+) - \mu(f^-) = \mu_1(f_1^+) - \mu_1(f_1^-) = \mu_1(f_1)$ as required.
+	Setting, $f_1^\pm = \int_{E_2} f^\pm(x_1,x_2) \dd{\mu_2(x_2)}$ we see than $f_1 = (f_1^+ - f_1^-) 1_{A_1}$.
+	Also by the first part, $\mu_1(f_1^+) = \mu(f^+) < \infty$ and $\mu_1(f_1^-) = \mu(f^-) < \infty$.
+	Hence, $\mu(f) =\footnote{As $f$ integrable} \mu(f^+) - \mu(f^-) = \mu_1(f_1^+) - \mu_1(f_1^-) =\footnote{As $f_1$ integrable due to $\mu_1(A_1^c) = 0$.} \mu_1(f_1)$ as required.
 \end{proof}
 
 \begin{remark}
@@ -138,18 +139,19 @@ \subsection{Fubini's theorem}
 
 	Let $(E_i, \mathcal E_i, \mu_i)$ be measure spaces with $\sigma$-finite measures.
 	Note that $(\mathcal E_1 \otimes \mathcal E_2) \otimes \mathcal E_3 = \mathcal E_1 \otimes (\mathcal E_2 \otimes \mathcal E_3)$, by a $\pi$-system argument using Dynkin's lemma.
-	So we can iterate the construction of the product measure to obtain a measure $\mu_1 \otimes \dots \mu_n$, which is a unique measure on $\qty(\prod_{i=1}^n E_i \bigotimes_{i=1}^n \mathcal E_i)$ with the property that the measure of a hypercube $\mu(A_1 \times A_n)$ is the product of the measures of its sides $\mu_i(A_i)$.
+	So we can iterate the construction of the product measure to obtain a measure $\mu_1 \otimes \dots \otimes \mu_n$\footnote{This is associative.}, which is a unique measure on $\qty(\prod_{i=1}^n E_i, \bigotimes_{i=1}^n \mathcal E_i)$ with the property that the measure of a hypercube $\mu(A_1 \times A_n)$ is the product of the measures of its sides $\mu_i(A_i)$.
 
 	In particular, we have constructed the Lebesgue measure $\mu^n = \bigotimes_{i=1}^n \mu$ on $\mathbb R^n$.
-	Applying Fubini's theorem, for functions $f$ that are either nonnegative and measurable or $\mu^n$-integrable, we have
+	Applying Fubini's theorem, for functions $f$ that are either non-negative and measurable or $\mu^n$-integrable, we have
 	\[ \int_{\mathbb R^n} f \dd{\mu^n} = \idotsint_{\mathbb R \dots \mathbb R} f(x_1, \dots, x_n) \dd{\mu(x_1)} \dots \dd{\mu(x_n)} \]
 \end{remark}
 
 \subsection{Product probability spaces and independence}
 \begin{proposition}
-	Let $(\Omega, \mathcal F, \mathbb P)$, and $(E, \mathcal E) = \qty(\prod_{i=1}^n E_i, \bigotimes_{i=1}^n \mathcal E_i)$.
-	Let $X \colon (\Omega, \mathcal F) \to (E, \mathcal E)$ be a measurable function, and define $X(\omega) = (X_1(\omega), X_2(\omega), \dots, X_n(\omega))$.
-	Then the following are equivalent.
+	Let $X_1, \dots, X_n$ be r.v.s, $X_i : (\Omega, \mathcal F, \mathbb P) \to (E_i, \mathcal{E}_i)$.
+	Set $(E, \mathcal E) = \qty(\prod_{i=1}^n E_i, \bigotimes_{i=1}^n \mathcal E_i)$.
+	Consider $X \colon (\Omega, \mathcal F, \mathbb{P}) \to (E, \mathcal E)$ given by $X(\omega) = (X_1(\omega), X_2(\omega), \dots, X_n(\omega))$.
+	Then $X$ is $\mathcal{E}$-measurable and the following are equivalent.
 	\begin{enumerate}
 		\item $X_1, \dots, X_n$ are independent random variables;
 		\item $\mu_X = \bigotimes_{i=1}^n \mu_{X_i}$;
@@ -157,24 +159,33 @@ \subsection{Product probability spaces and independence}
 	\end{enumerate}
 \end{proposition}
 \begin{proof}
-	\emph{(i) implies (ii).}
-	Consider the $\pi$-system $\mathcal A$ of rectangles $A = \prod_{i=1}^n A_i$ for $A_i \in \mathcal E_i$.
-	Since $\mu_X$ is an image measure,
-	Then
-	\[ \mu_X(A_1 \times \dots \times A_n) = \prob{X_1 \in A_1, \dots, X_n \in A_n} = \prob{X_1} \dots \prob{A_n} = \prod_{i=1}^n \mu_{X_i}(A_i) \]
-	So by uniqueness, the result follows.
+	To show $X$ measurable suffices to check $X\inv(A_1 \times \dots \times A_n) \in \mathcal{F}$, where $A_i \in \mathcal{E}_i \ \forall \; i$ as this is a $\pi$-system generating $\mathcal{E}$.
+	\begin{align*}
+		X\inv(A_1 \times \dots \times A_n) &= \qty{\omega : X_1(\omega) \in A_1, \dots, X_n(\omega) \in A_n} \\
+		&= \bigcap_{i = 1}^n X_i\inv(A_i).
+	\end{align*}
+	$X_i$ measurable so $X_i\inv(A_i) \in \mathcal{F}$ and so the intersection is in $\mathcal{F}$.
+
+	(1) $\implies$ (2):
+	Consider the $\pi$-system $\mathcal A$ of rectangles $A = \prod_{i=1}^n A_i$ for $A_i \in \mathcal E_i$, as this generates $\mathcal{E}$ suffices to check equality on it.
+
+	Since $\mu_X$ is an image measure, then
+	\begin{align*}
+		\mu_X(A_1 \times \dots \times A_n) = \prob{X_1 \in A_1, \dots, X_n \in A_n} = \prob{X_1} \dots \prob{A_n} &= \prod_{i=1}^n \mu_{X_i}(A_i) \\
+		&= \qty(\bigotimes_{i=1}^n \mu_{X_i})(A).
+	\end{align*}
 
-	\emph{(ii) implies (iii).}
+	(2) $\implies$ (3):
 	By Fubini's theorem,
 	\begin{align*}
 		\expect{\prod_{i=1}^n f_i(X_i)} &= \mu_X\qty(\prod_{i=1}^n f_i(x_i)) \\
-		&= \int_E f(x) \dd{\mu(x)} \\
+		&= \int_E f(x) \dd{\mu_X(x)} \\
 		&= \idotsint_{E_i} \qty(\prod_{i=1}^n f_i(x_i)) \dd{\mu_{X_1}(x_1)} \dots \dd{\mu_{X_2}(x_2)} \\
 		&= \prod_{i=1}^n \int_{E_i} f_i(x_i) \dd{\mu_{X_i}(x_i)} \\
 		&= \prod_{i=1}^n \expect{f_i(X_i)}
 	\end{align*}
 
-	\emph{(iii) implies (i).}
+	(3) $\implies$ (1):
 	Let $f_i = 1_{A_i}$ for any $A_i \in \mathcal E_i$.
 	These are bounded and measurable functions.
 	Then
diff --git a/ProbAndMeasure/05_function_spaces_and_norms.tex b/ProbAndMeasure/05_function_spaces_and_norms.tex
new file mode 100644
index 0000000..b33e030
--- /dev/null
+++ b/ProbAndMeasure/05_function_spaces_and_norms.tex
@@ -0,0 +1,443 @@
+\section{$L^p$ Spaces, Norms and Inequalities}
+\subsection{Norms}
+\begin{definition}[Norm]
+	A \vocab{norm} on a real vector space is a map $\norm{\wildcard}_V \colon V \to \mathbb [0, \infty)$ s.t.
+	\begin{enumerate}
+		\item $\norm{\lambda v} = \abs{\lambda} \cdot \norm{v}$;
+		\item $\norm{u + v} \leq \norm{u} + \norm{v}$;
+		\item $\norm{v} = 0 \iff v = 0$.
+	\end{enumerate}
+\end{definition}
+
+\begin{definition}
+	Let $(E, \mathcal E, \mu)$ be a measure space.
+	We define $L^p(E,\mathcal E,\mu) = L^p(\mu) = L^p$ for the space of measurable functions $f \colon E \to \mathbb R$ s.t. $\norm{f}_p$ is finite, where
+	\[ \norm{f}_p = \begin{cases}
+		\qty(\int_E \abs{f(x)}^p \dd{\mu(x)})^{\frac{1}{p}} & 1 \leq p < \infty \\
+		\esssup \abs{f} = \inf \qty{\lambda \geq 0 : \abs{f} \leq \lambda \text{ $\mu$-a.e.}} & p = \infty
+	\end{cases} \]
+\end{definition}
+
+We must check that $\norm{\wildcard}_p$ as defined is a norm. \\
+Clearly (1) holds for all $1 \leq p < \infty$ by linearity of integral and for $p = \infty$ its obvious. \\
+Property (2) holds for $p = 1$ and $p = \infty$, and we will prove later that this holds for other values of $p$ by Minkowski inequality. \\
+The last property does not hold: $f = 0$ implies $\norm{f}_p = 0$, but $\norm{f}_p = 0$ implies only that $\abs{f}^p = 0$ a.e., so $f$ is zero a.e. on $E$.
+
+Therefore, to rigorously define the norm, we must construct the quotient space $\mathcal L^p$ of functions that coincide a.e..
+We write $[f]$ for the equivalence class of functions that are equal a.e.
+The functional $\norm{\wildcard}_p$ is then a norm on $\mathcal L^p = \qty{[f] : f \in L^p}$.
+
+\begin{proposition}[Chebyshev's inequality, Markov's inequality]
+	Let $f \colon E \to \mathbb R$ be non-negative and measurable.
+	Then $\forall \; \lambda > 0$,
+	\[ \mu(\qty{x \in E : f(x) \geq \lambda}) = \mu(f \geq \lambda) \leq \frac{\mu(f)}{\lambda} \]
+\end{proposition}
+
+\begin{proof}
+	Integrate the inequality $\lambda 1_{\qty{f \geq \lambda}} \leq f$, which holds on $E$.
+\end{proof}
+
+In particular if $g \in L^p$, $p < \infty$ and $\lambda > 0$ then $\mu(\abs{g} \geq \lambda) = \mu(\abs{g}^p \geq \lambda^p) \leq \frac{\mu(|g|^p)}{\lambda^p} \leq \infty$ this gives the tail estimates as $\lambda \to \infty$.
+
+\begin{definition}[Convex Function]
+	Let $I \subseteq R$ be an interval.
+	Then we say a map $c \colon I \to \mathbb R$ is \vocab{convex} if for all $x, y \in I$ and $t \in [0,1]$, we have $c(tx + (1-t)y) \leq tc(x) + (1-t)c(y)$.
+	Equivalently, for all $x < t < y$ and $x, y \in I$, we have $\frac{c(t) - c(x)}{t-x} \leq \frac{c(y) - c(t)}{y-t}$.
+\end{definition}
+
+Thus a convex function is continuous on the interior of the interval and so is Borel measurable.
+
+\begin{lemma}
+	Let $I \subseteq R$ be an interval and $c : I \to \mathbb{R}$, and let $m \in$ the interior of $I$.
+	If $c$ is convex on $I$, $\exists \; a, b$ s.t. $c(x) \geq ax + b \ \forall \; x \in I$, and $c(m) = am + b$.
+\end{lemma}
+
+\begin{proof}
+	Define $a = \sup \qty{\frac{c(m) - c(x)}{m - x} : x < m, x \in I}$.
+	This exists in $\mathbb R$ by the second definition of convexity.
+	Let $x, y \in I$, and $y > m > x$.
+	Then $\frac{c(m) - c(x)}{m - x} \leq a \leq \frac{c(y) - c(m)}{y - m}$, so $c(y) \geq ay - am + c(m) = ay + b$ where we define $b = c(m) - am$.
+	Similarly, for $x$, we have $c(x) \geq ax + b$.
+\end{proof}
+
+\begin{theorem}[Jensen's inequality]
+	Let $X$ be a integrable\footnote{$\expect{\abs{X}} < \infty$} r.v. taking values in an interval $I \subseteq \mathbb R$.
+	Let $c \colon I \to \mathbb R$ be a convex function.
+	Then $\expect{c(X)}$ well-defined and
+	\begin{align*}
+		c(\expect{X}) \leq \expect{c(X)}.
+	\end{align*}
+\end{theorem}
+
+% Note that the integral $\expect{c(X)}$ is defined as $\expect{c^+(X)} - \expect{c^-(X)}$, and this is well-defined and takes values in $(-\infty, \infty]$.
+
+\begin{proof}
+	If $X$ is a constant a.s., then done.
+
+	Otherwise, then $m = \mathbb{E}[X] \in \operatorname{int}\footnote{Interior of} I$
+
+	Using the previous lemma, $\exists \; a, b$ s.t. $c(X) \geq aX + b$.
+	In particular, $(c(X))^- \leq |a| |X| + |b|$\footnote{$f \geq g$ gives $-f \leq -g$ so $f^- \leq g^- \leq |g|$.}.
+	Hence, $\expect{c^-(X)} \leq \abs{a} \expect{\abs{X}} + \abs{b} < \infty$, and $\expect{c(X)} = \expect{c^+(X)} - \expect{c^-(X)}$ is well-defined in $(-\infty,\infty]$.
+
+	Integrating $c(X) \geq aX + b$\footnote{If $\mathbb{E}[c(X)] = \infty$ done.},
+	\[ \expect{c(X)} \geq a \expect{X} + b\footnote{Expectation of $\Omega$ is 1, this is why we need a probability measure.} = am + b = c(m) = c(\expect{X}) \]
+\end{proof}
+
+\begin{example}
+	$(\Omega, \mathcal{F}, \mathbb{P})$ and $1 \leq p \leq \infty$.
+	If $X \in L^\infty(\mathbb{P})$ then $X \in L^p(\mathbb{P})$ as $\norm{X}_p \leq \norm{X}_\infty$ as $\mathbb{P}(\Omega)$ finite.
+\end{example}
+
+\begin{example}
+	If $1 \leq p < q < \infty$, $c(x) = \abs{x}^{\frac{q}{p}}$ is a convex function.
+	If $X$ is a bounded r.v. (so lies in $L^\infty(\mathbb P)$), we then have
+	\[ \norm{X}_p = \expect{\abs{X^p}}^{\frac{1}{p}} = c(\expect{\abs{X}^p})^{\frac{1}{q}} \mathcolor{red}{\leq}\footnote{By Jensen} \expect{c(\abs{X}^p)}^{\frac{1}{q}} = \norm{X}_q \]
+	Using the monotone convergence theorem, this extends to all $X \in L^q(\mathbb P)$ when $\norm{X}_q$ is finite.
+	So $X \in L^q \implies X \in L^p$ so $L^q(\mathbb P) \subseteq L^p(\mathbb P)$ for all $1 \leq p \leq q \leq \infty$.
+\end{example}
+
+\begin{theorem}[H\"older's inequality]
+	Let $f, g$ be measurable functions on $(E,\mathcal E,\mu)$.
+	If $p, q$ are \vocab{conjugate}, so $\frac{1}{p} + \frac{1}{q} = 1$ and $1 \leq p \leq q \leq \infty$, we have
+	\[ \mu(\abs{fg}) = \int_E \abs{f(x)g(x)} \dd{\mu} \leq \norm{f}_p \cdot \norm{g}_q \]
+\end{theorem}
+
+\begin{remark}
+	For $p = q = 2$, this is exactly the Cauchy--Schwarz inequality on $L^2$ (Simpler proof on Sheet 3 by considering $\int (f+g)^2 \geq 0$.).
+\end{remark}
+
+\begin{proof}
+	The cases $p = 1$ or $p = \infty$ are obvious.
+	We can assume $f \in L^p$ and $g \in L^q$ wlog since the right hand side would otherwise be infinite.
+	We can also assume $f$ is not equal to zero a.e., otherwise this reduces to $0 \leq 0$.
+
+	Hence, $\norm{f}_p > 0$.
+	Then, we can divide both sides by $\norm{f}_p$ and then assume $\norm{f}_p = 1$.
+
+	Define a prob measure $\mathbb{P}$ on $\mathcal{E}$ by $\mathbb{P}(A) = \int_A |f|^p \dd{\mu}$ ($\mathbb{P}$ has prob density $|f|^p$ wrt $\mu$).
+	Note, for $h \geq 0$ $\int h \dd{\mathbb{P}} = \int h |f|^p \dd{\mu}$.
+
+	The
+	\begin{align*}
+		\mu(|fg|) &= \mu(|fg| 1_{|f| > 0}) \\
+		&= \int |f| |g| 1_{|f| > 0} \dd{\mu} \\
+		&= \int \frac{|f|^p}{|f|^{p-1}} |g| 1_{|f| > 0} \dd{\mu} \\
+		&= \int \frac{|g|}{|f|^{p-1}} 1_{|f| > 0} |f|^p \dd{\mu} \\
+		&= \int \frac{|g|}{|f|^{p-1}} 1_{|f| > 0} \dd{\mathbb{P}} \\
+		&= \mathbb{E}\qty[\frac{|g|}{|f|^{p-1}} 1_{|f| > 0}] \\
+		&\leq \mathbb{E}\qty[\qty(\frac{|g|}{|f|^{p-1}} 1_{|f| > 0})^q]^{\frac{1}{q}}\footnote{Proven earlier by Jensen's that $\norm{X}_p \leq \norm{X}_q$ for $1 \leq p \leq q$.} \\
+		&= \mathbb{E}\qty[\frac{|g|^q}{|f|^p} 1_{|f| > 0}]^{\frac{1}{q}} \\
+		&= \qty(\int \frac{|g|^q}{|f|^p} 1_{|f| > 0} \dd{\mathbb{P}})^{\frac{1}{q}} \\
+		&= \qty(\int |g|^q 1_{|f| > 0} \dd{\mu})^{\frac{1}{q}} \\
+		&\leq \qty(\int |g|^q \dd{\mu})^{\frac{1}{q}} \\
+		&= \norm{g}_q
+	\end{align*}
+\end{proof}
+
+\begin{theorem}[Minkowski's inequality]
+	Let $f, g \colon (E, \mathcal E, \mu) \to \mathbb R$ be measurable functions.
+	Then for all $1 \leq p \leq \infty$, we have $\norm{f + g}_p \leq \norm{f}_p + \norm{g}_p$.
+\end{theorem}
+
+\begin{proof}
+	The results for $p = 1, \infty$ are clear.
+	Suppose $1 < p < \infty$.
+	We can assume wlog that $f, g \in L^p$.
+
+	We can integrate the pointwise inequality $\abs{f + g}^p \leq 2^p (\abs{f}^p + \abs{g}^p)$ to deduce that $\mu(|f+g|^p) \leq 2^p \qty[\mu(|f|^p) + \mu(|g|^p)] < \infty$ so $f + g \in L^p$.
+	We assume that $0 < \norm{f+g}_p$, otherwise the result is trivial.
+	Now, using H\"older's inequality with $q$ conjugate to $p$,
+	\begin{align*}
+		\norm{f+g}_p^p = \int \abs{f + g}^p \dd{\mu} &= \int \abs{f + g}^{p-1} \abs{f + g} \dd{\mu} \\
+		&\leq \int \abs{f + g}^{p-1} \abs{f} \dd{\mu} + \int \abs{f + g}^{p-1} \abs{g} \dd{\mu} \\
+		&\leq\footnote{By Holder's Inequality} \norm{f}_p \norm{(f+g)^{p-1}}_q + \norm{g}_p \norm{(f+g)^{p-1}}_q \\
+		&\leq \qty(\int \abs{f + g}^{q(p-1)} \dd{\mu})^{\frac{1}{q}} \qty(\norm{f}_p + \norm{g}_p) \\
+		&\leq \qty(\int \abs{f + g}^p \dd{\mu})^{\frac{1}{q}} \qty(\norm{f}_p + \norm{g}_p) \\
+		&\leq \norm{f+g}_p^{\frac{p}{q}} \qty(\norm{f}_p + \norm{g}_p)
+	\end{align*}
+	Dividing both sides by $\norm{f+g}_p^{\frac{p}{q}}$ noting $\frac{p}{q} = p-1$, we obtain $\norm{f+g}_p \leq \norm{f}_p + \norm{g}_p$.
+\end{proof}
+
+So the $L^p$ spaces are indeed normed spaces.
+
+\subsection{Banach spaces}
+
+\begin{definition}[Banach Space]
+	A \vocab{Banach space} is a complete normed vector space.
+\end{definition}
+
+\begin{theorem}[$\mathcal L^p$ is a Banach space]
+	Let $1 \leq p \leq \infty$, and let $f_n \in L^p$ be a Cauchy sequence, so $\forall \; \varepsilon > 0 \ \exists \; N$ s.t. $\forall \; m, n \geq N$, we have $\norm{f_m - f_n}_p < \varepsilon$.
+	Then $\exists \; f \in L^p$ s.t. $f_n \to f$ in $L^p$, so $\norm{f_n - f}_p \to 0$ as $n \to \infty$.
+\end{theorem}
+
+\begin{proof}
+	For this proof, we assume $p < \infty$; the other case is already proven in IB Analysis and Topology.
+
+	Since $f_n$ is Cauchy, using $\varepsilon = 2^{-k}$ we extract a subsequence $f_{N_k}$ of $L^p$ functions s.t.
+	\[ S = \sum_{k=1}^\infty \norm{f_{N_{k+1}} - f_{N_k}}_p \leq \sum_{k=1}^\infty 2^{-k} < \infty \]
+	By Minkowski's inequality, for any $K$, we have
+	\[ \norm{\sum_{k=1}^K \abs{f_{N_{k+1}} - f_{N_k}}}_p \leq \sum_{k=1}^K \norm{f_{N_{k+1}} - f_{N_k}}_p \leq S < \infty. \]
+	So $\int \abs{\sum_{k=1}^K \abs{f_{N_{k+1}} - f_{N_k}}}^p \dd{\mu} \leq S^p < \infty$.
+
+	By the monotone convergence theorem applied to $\abs{\sum_{k=1}^K \abs{f_{N_{k+1}} - f_{N_k}}}^p$ which increases to $\abs{\sum_{k=1}^\infty \abs{f_{N_{k+1}} - f_{N_k}}}^p$, we find
+	\[ \norm{\sum_{k=1}^\infty \abs{f_{N_{k+1}} - f_{N_k}}}_p \leq S < \infty \]
+	Since the integral is finite, we see that $\sum_{k=1}^\infty \abs{f_{N_{k+1}} - f_{N_k}}$ is finite $\mu$-a.e..
+	Let $A$ be the set where this sum is finite, then $\mu(A^c) = 0$.
+	For any $x \in A$, $(f_{N_k}(x))$ is Cauchy as sum finite, and since $\mathbb{R}$ complete it converges.
+	Define,
+	\[ f(x) = \begin{cases}
+		\lim_{k \to \infty} f_{N_k}(x) & x \in A \\
+		0 & x \in A^c
+	\end{cases} \]
+	so $f_{N_k} \to f$ as $k \to \infty$ $\mu$-a.e. and $f$ measurable as the limit of measurable fcns.
+
+	Now, by Fatou's lemma,
+	\begin{align*}
+		\norm{f_n - f}_p^p &= \mu(\abs{f_n - f}^p) \\
+		&= \mu(\lim_k \abs{f_n - f_{N_k}}^p) \\
+		&= \mu(\liminf_k \abs{f_n - f_{N_k}}^p) \\
+		&\leq \liminf_k \mu(\abs{f_n - f_{N_k}}^p) \\
+		&\leq \epsilon^p \quad \forall \; n \geq N.
+	\end{align*}
+
+	Since the $f_n$ are Cauchy,
+	\[ \norm{f}_p \leq \underbrace{\norm{f - f_N}_p}_{\leq \varepsilon} + \underbrace{\norm{f_N}_p}_{< \infty} < \infty \]
+	so $f \in L^p$ and so $f_n \to f$ in $L^p$.
+\end{proof}
+
+\begin{remark}
+	If $V$ is any of the spaces
+	\[ C([0, 1]);\quad\qty{f \text{ simple}};\quad\qty{f \text{ a finite linear combination of indicators of intervals}} \]
+	then $V$ is dense in $L^p((0, 1), \mathcal{B}, \lambda)$.
+	So the completion $\overline{(V,\norm{\wildcard})}$ is exactly $L^1(\lambda)$ (Proof on Sheet 3, first prove for finite linear combinations, use monotone class theorem, approximate continuous fcns by indicators of intervals so done).
+\end{remark}
+
+\subsection{Hilbert spaces}
+
+\begin{definition}[Inner Product]
+	A symmetric bilinear form $\inner{\wildcard, \wildcard} \colon V \times V \to \mathbb R$ on a real vector space $V$ is called an \vocab{inner product} if $\inner{v,v} \geq 0$ and $\inner{v,v} = 0 \iff v = 0$. \\
+	In this case, we can define a norm\footnote{Cauchy-Schwarz gives triangle inequality} $\norm{v} = \sqrt{\inner{v,v}}$.
+\end{definition}
+
+\begin{definition}[Hilbert Space]
+	If $(V,\inner{\wildcard,\wildcard})$ is complete, we say that it is a \vocab{Hilbert space}.
+\end{definition}
+
+\begin{corollary}
+	The space $\mathcal L^2$ is a Hilbert space for the inner product $\inner{f,g} = \int_E fg \dd{\mu}$.
+\end{corollary}
+
+\begin{example}
+	An analog of the Pythagorean theorem holds.
+	Let $f, g \in L^2$, then $\norm{f + g}_2^2 = \norm{f}_2^2 + 2\inner{f,g} + \norm{g}_2^2$.
+\end{example}
+
+\begin{example}
+	The parallelogram identity holds: $\norm{f+g}_2^2 + \norm{f-g}_2^2 = 2 \qty(\norm{f}_2^2 + \norm{g}_2^2)$
+\end{example}
+
+\begin{definition}[Orthogonal]
+	We say $f$ is \vocab{orthogonal} to $g$ if $\inner{f,g} = 0$.
+\end{definition}
+
+\begin{remark}
+	$f$ and $g$ are orthogonal iff $\norm{f + g}_2^2 = \norm{f}_2^2 + \norm{g}_2^2$.
+
+	For centred (mean zero) r.v.s $X, Y$, we have $\inner{X,Y} = \expect{XY} = \expect{(X - \expect{X})(Y - \expect{Y})} = \Cov{X,Y}$ which vanishes when $X$ and $Y$ are orthogonal.
+\end{remark}
+
+\begin{definition}[Orthogonal Complement]
+	Let $V \subseteq L^2(\mu)$.
+	We define its \vocab{orthogonal complement} to be
+	\[ V^\perp = \qty{f \in L^2(\mu) : \inner{f,g} = 0 \quad \forall g \in V} \]
+\end{definition}
+
+\begin{definition}[Closed Set]
+	We say that a subset $V$ of $\mathcal L^2$ is \vocab{closed} if any sequence $f_n \in V$ that converges in $\mathcal L^2$, its limit $f$ coincides a.e. with some $v \in V$.
+\end{definition}
+
+\begin{theorem}[Orthogonal Projection]
+	Let $V$ be a \underline{closed linear subspace} of $\mathcal L^2(\mu)$.
+	Then $\forall \; f \in \mathcal L^2$, $\exists$ an orthogonal decomposition $f = v + u$ where $v \in V$ and $u \in V^\perp$. \\
+	Moreover, $\norm{f - v}_2 \leq \norm{f - g}_2 \ \forall \; g \in V$ with equality iff $v = g$ a.e..
+\end{theorem}
+
+\begin{definition}[Projection]
+	We call $v$ the \vocab{projection} of $f$ onto $V$.
+\end{definition}
+
+\begin{proof}
+	In this proof, we use $p = 2$ for all norms.
+	We define $d(f,V) = \inf_{g \in V} \norm{g - f}$, and let $g_n \in V$ be a sequence of functions s.t. $\norm{g_n - f} \to d(f,V)$. \\
+	By the parallelogram law,
+	\begin{align*}
+		2\norm{f - g_n}^2 + 2\norm{f - g_m}^2 &= \norm{2f - (g_n + g_m)}^2 + \norm{g_n - g_m}^2 \\
+		&= 4 \norm{f - \underbrace{\frac{g_n + g_m}{2}}_{\in V}}^2 + \norm{g_n - g_m}^2 \\
+		&\geq 4 d(f,V)^2 + \norm{g_n - g_m}^2
+	\end{align*}
+	Taking the limit superior as $n, m \to \infty$, $\limsup_{m,n} 0$.
+	So the sequence $g_n$ is Cauchy in $L^2$, so by completeness, it converges to some $v \in L^2$.
+	Since $V$ is closed, $v \in V$.
+	In particular, $d(f,V) = \inf_{g \in V} \norm{g - f} = \norm{v - f}$.
+
+	Note that $d(f,V)^2 \leq F(t) = \norm{f - (v+th)}^2 > d(f, V)^2 - 2t \inner{f-v, h} + t^2 \norm{h}^2$ where $t \in \mathbb R$ and $h \in V$.
+	Letting $t \downarrow 0$ and $t \uparrow 0$, we obtain $\inner{f - v, h} = 0$ for all $h$.
+	Defining $f - v = u$, we have $f = u + v$ and $u \in V^\perp$ since $h$ was arbitrary.
+
+	For any $g \in V$, $f - g = \underbrace{f-v}_{\in V^\perp} + \underbrace{v - g}_{\in V}$.
+	So $\norm{f - g}^2 = \norm{f - v}^2 + \norm{v - g}^2$ hence $\norm{f - g} \geq \norm{f - v}$ with equality iff $\norm{v - g} = 0$, i.e. $v = g$ a.e..
+
+	% For uniqueness, suppose $f = w + z$ with $w \in V$ and $z \in V^\perp$.
+	% Then $v - w + u - z = f - f = 0$, so taking norms, $0 = \norm{v - w + u - z}^2 = \norm{v - w}^2 + \norm{u - z}^2$ so $v = w$ and $u = z$ (a.e.) by orthogonality.
+\end{proof}
+
+\subsection{Conditional Expectation}
+
+% \begin{definition}[Sub-$\sigma$ algebra]
+
+% \end{definition}
+
+If $\mathcal{G}$ a sub-$\sigma$ algebra of $\mathcal{F}$ (i.e. $\mathcal{G} \subseteq \mathcal{F}$), then $L^2(\Omega, \mathcal{G}, \mathbb{P})$ is a closed subspace of $L^2(\Omega, \mathcal{F}, \mathbb{P})$.
+
+\begin{definition}[Conditional Expectation]
+	For $X \in L^2(\Omega, \mathcal{F}, \mathbb{P})$ s.t. $X$ measurable wrt $\mathcal{G}$, $\norm{X - Y}_2 \geq \norm{X - \mathbb{E}[X \mid \mathcal{G}]}_2 \quad \forall \; Y$ that are $\mathcal{G}$ measurable.\\
+	The\footnote{A variant of the} \vocab{conditional expectation of $X$ given $\mathcal{G}$}, $\mathbb{E}[X \mid \mathcal{G}]$ is defined as the orthogonal projection of $X$ on $L^2(\Omega, \mathcal{G}, \mathbb{P})$.
+\end{definition}
+
+\begin{question}
+	How to define $\mathcal{E}[X \mid \mathcal{G}]$ if $X \in L^1(\Omega, \mathcal{F}, \mathbb{P})$, see Advanced Probability.
+\end{question}
+
+\begin{example}
+	Let $(G_i)_{i \in I}$ be a countable family of disjoint events whose union is $\Omega$ and set $\mathcal{G} = \sigma(G_i : i \in I)$.
+	Let $X$ be integrable.
+	Then the conditional expectation of $X$ given $\mathcal{G}$ is given by:
+	\begin{align*}
+		\mathbb{E}[X \mid G_i] = \frac{\mathbb{E}[X 1_{G_i}]}{\mathbb{P}(G_i)} \quad \forall \; i \in I.
+	\end{align*}
+	Let $Y = \sum_i \mathbb{E}[X \mid G_i] 1_{G_i}$ (i.e. if $\omega \in G_i$, $Y(\omega) = \mathbb{E}[X \mid G_i]$).
+	Check that $Y$ is $\mathcal{G}$-measurable; $Y \in L^2(\Omega, \mathcal{G}, \mathbb{P})$; and that $Y$ is ``the'' orthogonal projection of $X$ onto $L^2(\Omega, \mathcal{G}, \mathbb{P})$ if $X \in L^2(\Omega, \mathcal{F}, \mathbb{P})$.
+\end{example}
+
+\subsection{$L^p$ Convergence and Uniform Integrability}
+
+For $(\Omega, \mathcal{F}, \mathbb{P})$, what are the implications between convergence: a.s., in $L^p$ for $1 \leq p < \infty$, in $\mathbb{P}$ and in distribution.
+
+Let $f_n = n 1_{(0, 1/n)}$ on $\qty((0, 1), \mathcal{B}, \lambda)$.
+$f_n \to 0$ a.s. but $\mathbb{E}\abs{f_n} = \mathbb{E}[f_n] = 1 \ \forall \; n$ so a.s. $\centernot\implies$ $L^p$ convergence.
+
+$\mathbb{P}(|X_n - X| > \epsilon) \leq \frac{\mathbb{E}|X_n - X|^p}{\epsilon^p}$ by Markov's Inequality, so convergence in $L^p$ for $1 \leq p < \infty \implies$ convergence in $\mathbb{P}$.
+
+\begin{theorem}[Dominated Convergence Theorem]
+	Let $X_n$ be r.v.s on $(\Omega, \mathcal F, \mathbb P)$ s.t. $\abs{X_n} \leq Y$ for integrable r.v. $Y$ and they converge in $\mathbb{P}$ to $X$.
+	Then $X_n \to X$ in $L^1(\mathbb P)$, i.e. $\mathbb{E}|X_n - X| \to 0$.
+\end{theorem}
+
+\begin{question}
+	What is the ``minimum condition'' on $(X_n)$ under which $X_n \to X$ in $\mathbb{P}$ implies $X_n \to X$ in $L^1(\mathbb{P})$.
+\end{question}
+
+\begin{answer}
+	Uniformly Integrable
+\end{answer}
+
+% \begin{proof}
+% 	We know that $X_{n_k} \to X$ almost surely along a subsequence $n_k$.
+% 	So $\abs{X} = \lim_k \abs{X_{n_k}} \leq C < \infty$ almost surely.
+% 	Then
+% 	\begin{align*}
+% 		\expect{\abs{X_n - X}} &= \expect{\abs{X_n - X} \qty(1_{\qty{\abs{X_n - X} > \frac \varepsilon 2}} + 1_{\qty{\abs{X_n - x} \leq \frac \varepsilon 2}})} \\
+% 		&\leq 2 C \prob{\abs{X_n - X} \geq \frac{\varepsilon}{2}} + \frac \varepsilon 2 \\
+% 		&< \varepsilon
+% 	\end{align*}
+% 	for sufficiently large $n$.
+% \end{proof}
+
+For $X \in L^1(\mathbb P)$, then as $\delta \to 0$,
+\[ I_X(\delta) = \sup \qty{ \expect{\abs{X} 1_A} : \prob{A} \leq \delta, A \in \mathcal{F}} \to 0 \]
+If not, $\exists \; \varepsilon > 0$ and $A_n \in \mathcal F$ s.t. $\prob{A_n} \leq 2^{-n}$ but $\expect{\abs{X} 1_{A_n}} \geq \varepsilon$.
+Since $\sum_n \prob{A_n} < \infty$, by the first Borel--Cantelli lemma, we have $\prob{\bigcap_n \bigcup_{m \geq n} A_m} = 0$.
+But $\expect{\abs{X} 1_{A_n}} \leq \expect{\abs{X} 1_{\bigcup_{m \geq n} A_m}}$.
+Note that $1_{\bigcup_{m \geq n} A_m} \to 1_{\bigcap_n \bigcup_{m \geq n} A_n} = 0$ a.s., so $\expect{\abs{X} 1_{\bigcup_{m \geq n} A_m}} \to \expect{\abs{X} 1_{\bigcap_n \bigcup_{m \geq n}}} = 0$ by DCT \Lightning.
+
+\begin{definition}[Uniformly Integrable]
+	For a collection $\mathcal X \subseteq L^1(\mathbb P)$ of r.v.s, we say $\mathcal X$ is \vocab{uniformly integrable (UI)} if it is bounded in $L^1(\mathbb P)$\footnote{I.e. $\sup_{x \in \mathcal{X}} \norm{X}_1 = \sup_{x \in \mathcal{X}} |X| = I_{\mathcal{X}}(1) < \infty$.}, and
+	\[ I_{\mathcal X}(\delta) = \sup \qty{ \expect{\abs{X}1_A} : \prob{A} \leq \delta, X \in \mathcal X} \to 0 \text{ as } \delta \to 0.\]
+\end{definition}
+
+\begin{remark}
+	\begin{enumerate}
+		\item Any single integrable r.v. is UI.
+		Also, true for any finite collection of integrable r.v.s.
+		Also, if $\mathcal{X} = \qty{X : X \text{ a r.v. s.t. } |X| \leq Y \text{ for some } Y \in L^1}$ as $\sup_{X \in \mathcal{X}} \mathbb{E}[|X| 1_A] \leq \mathbb{E}[Y 1_A]$ implies $I_{\mathcal{X}}(\delta) \leq I_Y(\lambda) \to 0$ as $\delta \to 0$.
+		\item If $\mathcal X$ is bounded in $L^p(\mathbb P)$ for $p > 1$, then by H\"older's inequality,
+		\[ \expect{\abs{X}1_A} \leq \underbrace{\norm{X}_p}_{\text{bounded}} \cdot \underbrace{\prob{A}^{\frac 1 q}}_{\leq \delta^{\frac 1 q} \to 0} \]
+	\end{enumerate}
+\end{remark}
+
+% \begin{remark}
+% 	Note that $X_n = n1_{\qty[0,\frac{1}{n}]}$ for the Lebesgue measure $\mu$ on $[0,1]$ is bounded in $L^1(\mathbb P)$ but not uniformly integrable.
+% \end{remark}
+
+\begin{lemma}
+	$\mathcal X \subseteq L^1(\mathbb P)$ is UI $\iff \sup_{X \in \mathcal X} \expect{\abs{X} 1_{\qty{\abs{X} > K}}} \to 0$ as $K \to \infty$.
+\end{lemma}
+
+\begin{proof}
+	$(\implies)$: Applying Markov's inequality, as $K \to \infty$,
+	\[ \prob{\abs{X} > K} \leq \frac{\expect{\abs{X}}}{K} = \frac{\expect{\abs{X}1_{\Omega}}}{K} \leq \frac{I_{\mathcal X}(1)}{K} \to 0 \]
+	Using the uniform integrability property using $A = \qty{\abs{X} > K}$, we obtain the required limit.
+
+	$(\Longleftarrow)$:
+	\[ \expect{\abs{X}} = \expect{\abs{X}\qty(1_{\qty{\abs{X} \leq K}} + 1_{\qty{\abs{X} > K}})} \leq K + \frac{\varepsilon}{2} \]
+	for sufficiently large $K$.
+	So $\mathcal X$ is bounded in $L^1(\mathbb P)$ as required.
+	Then for $A$ s.t. $\prob{A} \leq \delta$,
+	\[ \expect{\abs{X}1_A\qty(1_{\qty{\abs{X} \leq K}} + 1_{\qty{\abs{X} > K}})} \leq K\prob{A} + \expect{\abs{X}1_{\qty{\abs{X} > K}}} \leq K\delta + \frac{\varepsilon}{2} < \varepsilon \]
+	for sufficiently small $\delta$.
+\end{proof}
+
+\begin{theorem}
+	Let $X_n, X$ be r.v.s on $(\Omega, \mathcal F, \mathbb P)$.
+	Then the following are equivalent.
+	\begin{enumerate}
+		\item $X_n, X \in L^1(\mathbb P)$ and $X_n \to X$ in $L^1(\mathbb P)$.
+		\item $\qty{X_n : n \in \mathbb N}$ is uniformly integrable, and $X_n \to X$ in $\mathbb{P}$.
+	\end{enumerate}
+\end{theorem}
+
+\begin{proof}
+	(1) $\implies$ (2i):
+	Using Markov's inequality,
+	\[ \prob{\abs{X_n - X} > \varepsilon} \leq \frac{\expect{\abs{X_n - X}}}{\varepsilon} \to 0 \]
+	so $X_n \to X$ in $\mathbb{P}$.
+
+	Choose $N$ s.t. $\mathbb{E}|X_n - X| < \frac{\epsilon}{2} \ \forall \; n \geq N$.
+	Choose $\delta$ s.t. $\mathbb{E}[|X| 1_A] \leq \frac{\epsilon}{2}$ and $\mathbb{E}[|X_n|1_A] \leq \epsilon \quad \forall \; n = 1, \dots, N-1$ when $\mathbb{P}(A) < \delta$.
+	\begin{align*}
+		\expect{\abs{X_n} 1_A} \leq \expect{\abs{X_n - X} 1_A} + \expect{\abs{X} 1_A} \leq \frac{\varepsilon}{2} + \frac{\varepsilon}{2}
+	\end{align*}
+	$\{X_1, \dots, X_{N-1}, X\}$ is finite so UI.
+	So $\mathcal{X}$ is UI.
+
+	(2) $\implies$ (1):
+	$X_n \to X$ in $\mathbb{P}$, so take a subsequence $n_k$ s.t. $X_{n_k} \to X$ a.s..
+	Then,
+	\begin{align*}
+		\expect{\abs{X}} = \expect{\liminf_k \abs{X_{n_k}}} \leq\footnote{Fatou's lemma} \liminf_k \expect{\abs{X_{n_k}}} \leq I_{\mathcal X}(1) <\footnote{As $\mathcal{X}$ is UI, hence $L^1$ bounded.} \infty,
+	\end{align*}
+	so $X \in L^1(\mathbb P)$.
+
+	Next, we define truncated r.v.s $X_n^K = \max(-K, \min(K, X_n))$ and $X^K = \max(-K, \min(K, X))$.
+	Then $X_n^K \to X^K$ in $\mathbb{P}$ (as $\mathbb{P}(|X_n^K - X^K| > \epsilon) \leq \mathbb{P}(|X_n - X| < \epsilon)$)\footnote{Aside: If $X_n \to X$ in $\mathbb{P}$ and $f$ cts, then $f(X_n) \to f(X)$ in $\mathbb{P}$.}.
+	And $|X_n^K| \leq K \quad \forall \; n$ so by BCT, $X_n^K \to X^K$ in $L^1$.
+	Now,
+	\begin{align*}
+		\expect{\abs{X_n - X}} &\leq \expect{\abs{X_n - X_n^K}} + \expect{\abs{X_n^K - X^K}} + \expect{\abs{X^K - X}} \\
+		&= \expect{\abs{X_n} 1_{\qty{\abs{X_n} > k}}} + \expect{\abs{X_n^K - X^K}} + \expect{\abs{X}1_{\qty{\abs{X} > K}}} \\
+		&< \varepsilon
+	\end{align*}
+	by choosing sufficiently large $K$ (by UI) and $n$.
+\end{proof}
\ No newline at end of file
diff --git a/ProbAndMeasure/06_fourier_analysis.tex b/ProbAndMeasure/06_fourier_analysis.tex
new file mode 100644
index 0000000..f16b34a
--- /dev/null
+++ b/ProbAndMeasure/06_fourier_analysis.tex
@@ -0,0 +1,403 @@
+\section{Fourier transforms}
+\subsection{Fourier transforms}
+In this section, we will write $L^p = L^p(\mathbb R^d)$ for the space of \underline{complex valued} Borel measurable fcns on $\mathbb{R}^d$, i.e. $f \colon \mathbb R^d \to \mathbb C$ s.t. $\norm{f}_p = \qty(\int_{\mathbb R^d} \abs{f(x)}^p \dd{x})^{\frac 1p} < \infty$ for $1 \leq p < \infty$.
+
+\begin{remark}
+	For $g$ measurable s.t. $\int |g| < \infty$, define $\int g(x) \dd{x} = \int \Re(g(x)) \dd{x} + i \int \Im(g(x)) \dd{x}$.
+
+	Note that for some $u + iv = \alpha \in \mathbb C$ with $\abs{\alpha} = 1$,
+	\[ \abs{\int_{\mathbb R^d} f(x) \dd{x}} = \int_{\mathbb R^d} \alpha f(x) \dd{x} = \int_{\mathbb R^d} u(x) \dd{x} + i \int_{\mathbb R^d} v(x) \dd{x} \]
+	But since the left hand side is real-valued, the $i \int_{\mathbb R^d} v(x) \dd{x}$ term vanishes.
+	So
+	\[ \abs{\int_{\mathbb R^d} f(x) \dd{x}} = \int_{\mathbb R^d} u(x) \dd{x} \leq \int_{\mathbb R^d} \abs{f(x)} \dd{x} \]
+\end{remark}
+
+For $f, g \in L^2$, $\inner{f, g} = \int f(x) \overline{g(x)} \dd{\mu(x)}$ is an inner product on $L^2(\mu)$.
+
+For any $y \in \mathbb{R}^d$,
+\begin{align*}
+	\int f(x - y) \dd{x} &= \int f(y - x) \dd{x} - \int f(x) \dd{x} \\
+	&= \int f(-x) \dd{x}.
+\end{align*}
+This is by the translation invariance and $x \mapsto -x$ symmetry of $\lambda$, proved in Sheet 3.
+Also, for $a \in \mathbb{R}$ with $a \neq 0$, $\int f(ax) \dd{x} = \frac{1}{a^d} \int f(x) \dd{x}$.
+
+\begin{definition}[Fourier Transform]
+	Let $f \in L^1(\mathbb R^d)$.
+	We define the \vocab{Fourier transform} $\hat f$ by
+	\[ \hat f(u) = \int_{\mathbb R^d} f(x) e^{i\inner{u,x}} \dd{x} \]
+	where $u \in \mathbb{R}^d$ and $\inner{u,x} = \sum_{i=1}^d u_i x_i$.
+\end{definition}
+
+\begin{remark}
+	Note that $\abs{\hat f(u)} \leq \norm{f}_1 \quad \forall \; u \in \mathbb{R}^d$, i.e. $\hat{f} \in L^\infty$.
+
+	Also, if $u_n \to u$, then $e^{i\inner{u_n,x}} \to e^{i\inner{u,x}}$ so $f(x) e^{i\inner{u_n,x}} \to f(x) e^{i\inner{u,x}}$; $|f(x) e^{i\inner{u_n,x}}| \leq |f(x)|$ and $f \in L^1$.
+	By the DCT $\hat f(u_n) \to \hat f(u)$.
+	Moreover, $\lim_{\norm{u} \to \infty} \hat{f}(u) = 0$ (Riemann-Lebesgue Lemma, Sheet 3).
+	Thus $\hat f \in C_0(\mathbb{R}^d) = \qty{f \text{ bounded cts and vanishing at } \pm \infty}$.
+
+	The map is $1 - 1$ (but not onto), its injective but not surjective.
+\end{remark}
+
+\begin{definition}[Fourier Transform]
+	Let $\mu$ be a finite Borel measure on $\mathbb R^d$.
+	We define the \vocab{Fourier transform} of the measure for $u \in \mathbb{R}^d$ by
+	\[ \hat\mu(u) = \int_{\mathbb R^d} e^{i\inner{u,x}} \dd{\mu(x)} \]
+\end{definition}
+
+Note that $\abs{\hat \mu(u)} \leq \mu(\mathbb R^d)$, and $\hat \mu$ a bounded cts fcn on $\mathbb{R}^d$.
+If $\mu$ has a density $f$ (wrt $\lambda$), $\hat\mu = \int_{\mathbb R^d} e^{i\inner{u,x}} f(x) \dd{x} = \hat f$.
+
+\begin{definition}[Characteristic Function]
+	Let $X$ be an $\mathbb R^d$-valued r.v..
+	The \vocab{characteristic function (c.f.)} $\varphi_X$ of $X$ is the Fourier transform of its law $\mu_X = \mathbb{P} \circ X\inv$.
+	So,
+	\begin{align*}
+		\varphi_X(u) = \hat \mu_X(u) = \int e^{i \inner{u, x}} \underbrace{\dd{\mu_X(x)}}_{\dd{\mathbb{P}} \circ X\inv(x)}\footnote{Note that $\nu \circ f\inv(g) = \nu(f \circ g)$.} = \int e^{i\inner{u, x}} \dd{\mathbb{P}} = \expect{e^{i\inner{u,X}}}.
+	\end{align*}
+\end{definition}
+
+In particular if $X$ has pdf $f$, then $\phi_X(u) = \hat{f}(u)$.
+
+\begin{definition}[Fourier inversion formula]
+	Let $f \in L^1(\mathbb R^d)$ s.t. $\hat f \in L^1(\mathbb R^d)$.
+	Then we say that the \vocab{Fourier inversion formula} holds for $f$ if
+	\[ f(x) = \frac{1}{(2\pi)^d} \int_{\mathbb R^d} \hat f(u) e^{-i\inner{u,x}} \dd{u} \]
+	a.e. in $\mathbb R^d$.
+\end{definition}
+
+\begin{definition}[Plancherel identity]
+	Let $f \in L^1(\mathbb R^d) \cap L^2(\mathbb R^d)$.
+	Then the \vocab{Plancherel identity} holds for $f$ if
+	\[ \norm{\hat f}_2 = (2\pi)^{\frac d2} \norm{f}_2 \]
+\end{definition}
+
+We will show that the Fourier inversion formula holds whenever $\hat f \in L^1(\mathbb R^d)$, and the Plancherel identity holds for all $f \in L^1(\mathbb R^d) \cap L^2(\mathbb R^d)$.
+
+\begin{remark}
+	Given the Plancherel identity, the Fourier transform is a linear isometry of $L^2(\mathbb R^d)$, by approximating any function in $L^2(\mathbb R^d)$ by integrable functions.
+\end{remark}
+
+\subsection{Convolutions}
+\begin{definition}
+	Let $f \in L^1(\mathbb R^d)$ and $\nu$ be a probability measure on $\mathbb R^d$.
+	We define their \emph{convolution} $f \ast \nu$ by
+	\[ (f \ast \nu)(x) = \begin{cases}
+		\int_{\mathbb R^d} f(x-y) \dd{\nu(y)} & \text{if } (y \mapsto f(x-y)) \in L^1(\nu) \\
+		0 & \text{else}
+	\end{cases} \]
+\end{definition}
+
+\begin{remark}
+	If $1 \leq p < \infty$, by Jensen's inequality,
+	\begin{align*}
+		\int_{\mathbb R^d} \qty( \int_{\mathbb R^d} \abs{f(x-y)} \dd{\nu(y)} )^p \dd{x} &\leq \int_{\mathbb R^d} \int_{\mathbb R^d} \abs{f(x-y)}^p \dd{\nu(y)} \dd{x} \\
+		&= \int_{\mathbb R^d} \int_{\mathbb R^d} \abs{f(x-y)}^p \dd{x} \dd{\nu(y)} \\
+		&= \int_{\mathbb R^d} \int_{\mathbb R^d} \abs{f(x)} \dd{\nu(y)} \dd{x} \\
+		&= \int_{\mathbb R^d} \abs{f(x)} \dd{x} \\
+		&= \norm{f}_p^p
+	\end{align*}
+	So $f \in L^p(\mathbb R^d)$, we have $(y \mapsto f(x-y)) \in L^p(\nu)$ almost everywhere, and again by Jensen's inequality,
+	\[ \norm{f \ast \nu}_p^p = \int_{\mathbb R^d} \abs{ \int_{\mathbb R^d} f(x-y)\dd{\nu(y)} }^p \dd{x} \leq \int_{\mathbb R^d} \qty( \int_{\mathbb R^d} \abs{f(x-y)} \dd{\nu(y)} )^p \dd{x} \leq \norm{f}_p^p \]
+	Hence $f \mapsto f \ast \nu$ is a contraction on $L^p(\mathbb R^d)$.
+\end{remark}
+
+In the case where $\nu$ has a density $g$ with respect to the Lebesgue measure, we write $f \ast g = f \ast \nu$.
+
+\begin{definition}
+	For probability measures $\mu, \nu$ on $\mathbb R^d$, their convolution $\mu \ast \nu$ is a probability measure on $\mathbb R^d$ given by the law of $X + Y$ where $X, Y$ are independent r.v.s with laws $\mu$ and $\nu$, so
+	\begin{align*}
+		(\mu \ast \nu)(A) &= \prob{X+Y \in A} \\
+		&= \int_{\mathbb R^d \times \mathbb R^d} 1_A(x+y) \dd{(\mu \otimes \nu)(x, y)} \\
+		&= \int_{\mathbb R^d} \int_{\mathbb R^d} 1_A(x+y) \dd{\nu(y)} \dd{\mu(x)}
+	\end{align*}
+\end{definition}
+
+If $\mu$ has density $f$ with respect to the Lebesgue measure, $\mu \ast \nu$ has density $f \ast \nu$ with respect to the Lebesgue measure.
+Indeed,
+
+\begin{align*}
+	(\mu \ast \nu)(A) &= \int_{\mathbb R^d} \int_{\mathbb R^d} 1_A(x+y) f(x) \dd{x} \dd{\nu(y)} \\
+	&= \int_{\mathbb R^d} \int_{\mathbb R^d} 1_A(v) f(v-y) \dd{v} \dd{\nu(y)} \\
+	&= \int_{\mathbb R^d} 1_A(v) \int_{\mathbb R^d}f(v-y) \dd{\nu(y)} \dd{v} \\
+	&= \int_{\mathbb R^d} 1_A(v) (f \ast \nu)(v) \dd{v}
+\end{align*}
+
+\begin{proposition}
+	$\widehat{f \ast \nu}(u) = \hat f(u) \hat \nu(u)$.
+\end{proposition}
+
+\begin{proposition}
+	$\widehat{\mu \ast \nu}(u) = \expect{e^{i\inner{u,X+Y}}} = \expect{e^{i\inner{u,X}}e^{i\inner{u,Y}}} = \hat \mu(u) \hat \nu(u)$.
+\end{proposition}
+
+\subsection{Fourier transforms of Gaussians}
+\begin{definition}
+	The \emph{normal distribution} $N(0,t)$ is given by the probability density function
+	\[ g_t(x) = \frac{1}{\sqrt{2\pi t}} e^{-\frac{x^2}{2t}} \]
+\end{definition}
+If $\varphi_X$ is the characteristic function of a standard normal r.v., by integration by parts,
+\begin{align*}
+	\dv{u} \varphi_X(u) &= \dv{u} \int_{\mathbb R} e^{iux} g_1(x) \dd{x} \\
+	&= \int_{\mathbb R} g_1(x) \dv{u} e^{iux} \dd{x} \\
+	&= \frac{i}{\sqrt{2\pi}} \int_{\mathbb R} \underbrace{e^{iux}}_{v} \underbrace{x e^{-\frac{x^2}{2}}}_{w'} \dd{x} \\
+	&= \frac{i^2}{\sqrt{2\pi}} \int_{\mathbb R} u e^{iux} e^{-\frac{x^2}{2}} \dd{x} \\
+	&= -u \varphi_X(u)
+\end{align*}
+Hence,
+\[ \dv{u}\qty(e^{\frac{u^2}{2}} \varphi_X(u)) = ue^{\frac{u^2}{2}} \varphi_X(u) - e^{\frac{u^2}{2}} u \varphi_X(u) = 0 \]
+In particular, $\varphi_X(u) = \varphi_X(0) e^{-\frac{u^2}{2}} = e^{-\frac{u^2}{2}}$.
+In other words, $\hat g_1(u) = \sqrt{2\pi} g_1(u)$.
+
+In $\mathbb R^d$, consider a Gaussian random vector $Z = (Z_1, \dots, Z_d)$ with independent and identically distributed entries $Z_i \sim N(0,1)$.
+Then, the joint probability density function of $\sqrt{t}Z$ is
+\[ g_t(x) = \prod_{j=1}^d \frac{1}{\sqrt{2\pi t}} e^{-\frac{x_j^2}{2t}} = (2\pi t)^{-\frac{d}{2}} e^{-\frac{\norm{x}^2}{2t}} \]
+The Fourier transform of $g_t$ is
+\[ \hat g_t(u) = \expect{e^{i\inner{u,\sqrt{t}Z}}} = \expect{\prod_{j=1}^d e^{iu_j \sqrt{t} z_j}} = \prod_{j=1}^d \expect{e^{iu_j \sqrt{t} z_j}} = \prod_{j=1}^d e^{-u_j^2 \frac{t}{2}} = e^{-\frac{\norm{u}^2 t}{2}} \]
+which implies that in general, $\hat g_t(u) = (2\pi)^{\frac{d}{2}} t^{\frac{d}{2}} g_{\frac{1}{t}}(u)$.
+Taking the Fourier transform with respect to $u$, $\hhat g_t = (2\pi)^d g_t$, and since $g_t(-x) = g_t(x)$ and the Lebesgue measure is translation invariant, we have
+\[ g_t(x) = \frac{1}{(2\pi)^d} \hhat g_t(x) = \frac{1}{(2\pi)^d} \int_{\mathbb R^d} e^{-i\inner{u,x}} \hat g_t(u) \dd{u} \]
+so the Fourier inversion theorem holds for such Gaussian random vectors.
+\begin{definition}
+	We say that a function on $\mathbb R^d$ is a \emph{Gaussian convolution} if it is of the form
+	\[ f \ast g_t(x) = \int_{\mathbb R^d} f(x-y) g_t(y) \dd{y} \]
+	where $x \in \mathbb R^d, t > 0, f \in L^1(\mathbb R^d)$.
+\end{definition}
+We can show that $f \ast g_t$ is continuous on $\mathbb R^d$, and $\norm{f \ast g_t}_1 \leq \norm{f}_1$.
+Note that $\widehat{f \ast g_t}(u) = \hat f(u) e^{-\frac{\norm{u}^2 t}{2}}$, so $\norm{\widehat{f \ast g_t}}_\infty \leq \norm{f}_1$, giving $\norm{\widehat{f \ast g_t}}_1 \leq \norm{f}_1 (2\pi)^{\frac{d}{2}} t^{-\frac{d}{2}} < \infty$.
+\begin{lemma}
+	The Fourier inversion theorem holds for all Gaussian convolutions.
+\end{lemma}
+\begin{proof}
+	We can use the Fourier inversion theorem for $g_t(y)$ to see that
+	\begin{align*}
+		(2\pi)^d f \ast g_t(x) &= (2\pi)^d \int_{\mathbb R^d} f(x-y) g_t(y) \dd{y} \\
+		&= \int_{\mathbb R^d} f(x-y) \int_{\mathbb R^d} e^{-i\inner{u,y}} \hat g_t(u) \dd{u} \dd{y} \\
+		&= \int_{\mathbb R^d} e^{-i\inner{u,x}} \int_{\mathbb R^d} f(x-y) e^{i\inner{u,x-y}} \dd{y} \hat g_t(u) \dd{u} \\
+		&= \int_{\mathbb R^d} e^{-i\inner{u,x}} \int_{\mathbb R^d} f(z) e^{i\inner{u,z}} \dd{z} \hat g_t(u) \dd{u} \\
+		&= \int_{\mathbb R^d} e^{-i\inner{u,x}} \hat f(u) \hat g_t(u) \dd{u} \\
+		&= \int_{\mathbb R^d} e^{-i\inner{u,x}} \widehat{f \ast g_t}(u) \dd{u}
+	\end{align*}
+\end{proof}
+\begin{remark}
+	If $\mu$ is a finite measure, then $\mu \ast g_t = \mu \ast g_{\frac{t}{2}} \ast g_{\frac{t}{2}}$ with $\mu \ast g_{\frac{t}{2}} \in L^1$, so is also a Gaussian convolution.
+\end{remark}
+\begin{lemma}[Gaussian convolutions are dense in $L^p$]
+	Let $f \in L^p$ where $1 \leq p < \infty$.
+	Then $\norm{f \ast g_t - f}_p \to 0$ as $t \to 0$.
+\end{lemma}
+\begin{proof}
+	One can easily show that the space $C_c(\mathbb R^d)$ of continuous functions of compact support is dense in $L^p$.
+	Hence, for all $\varepsilon > 0$, there exists $h \in C_c(\mathbb R^d)$ s.t. $\norm{f - h}_p < \frac{\varepsilon}{3}$, and by properties of the convolution, we also obtain
+	\[ \norm{f \ast g_t - h \ast g_t}_p = \norm{(f - h) \ast g_t}_p \leq \norm{f - h}_p < \frac{\varepsilon}{3} \]
+	So
+	\[ \norm{f \ast g_t - f}_p \leq \norm{f \ast g_t - h \ast g_t}_p + \norm{h \ast g_t + h}_p + \norm{h - f}_p < \frac{\varepsilon}{2} + \norm{h \ast g_t - h}_p \]
+	so it suffices to prove the result for $f = h \in C_c(\mathbb R^d)$.
+	We define a new map
+	\[ e(y) = \int_{\mathbb R^d} \abs{h(x-y) - h(x)}^p \dd{x} \]
+	Since $h$ is bounded on its bounded support, the dominated convergence theorem implies that $e$ is continuous at $y = 0$.
+	Note that $e(y) \leq 2^{p+1} \norm{h}_p^p$.
+	Hence, by Jensen's inequality,
+	\begin{align*}
+		\norm{h \ast g_t - h}_p^p &= \int_{\mathbb R^d} \abs{ \int_{\mathbb R^d} (h(x-y) - h(x)) g_t(y) \dd{y} }^p \dd{x} \\
+		&\leq \int_{\mathbb R^d} \int_{\mathbb R^d} \abs{h(x-y) - h(x)}^p \dd{x} g_t(y) \dd{y} \\
+		&= \int_{\mathbb R^d} e(y) g_t(y) \dd{y} \\
+		&= \int_{\mathbb R^d} \underbrace{e(\sqrt{t} z)}_{\to e(0) = 0 \text{ as } t \to 0} g_1(z) \dd{z} \\
+		&\to 0
+	\end{align*}
+\end{proof}
+% TODO: Convert random instances of g into g_t
+\begin{theorem}[Fourier inversion]
+	Let $f \in L^1(\mathbb R^d)$ be s.t. $\hat f \in L^1(\mathbb R^d)$.
+	Then for almost all $x \in \mathbb R^d$,
+	\[ f(x) = \frac{1}{(2\pi)^d} \int_{\mathbb R^d} e^{-i\inner{u,x}} \hat f(u) \dd{u} \]
+\end{theorem}
+\begin{remark}
+	This proves that the Fourier transform is injective; $\hat f = \hat g$ implies $\widehat{f - g} = 0$ so by Fourier inversion, $f = g$ almost everywhere.
+	The identity holds everywhere on $\mathbb R^d$ for the (unique) continuous representative $f$ in its equivalence class.
+\end{remark}
+\begin{proof}
+	The Fourier inversion theorem holds for the following Gaussian convolution for all $t$.
+	\[ f \ast g_t(x) = \frac{1}{(2\pi)^d} \int_{\mathbb R^d} e^{-i\inner{u,x}} \hat f(u) e^{\frac{-\abs{u}^2 t}{2}} \dd{u} = f_t(x) \]
+	Now, since Gaussian convolutions are dense, $f \ast g_t \to f$ in $L^1$, so $f \ast g_t \to f$ in measure by Markov's inequality.
+	Hence, along a subsequence, $f \ast g_{t_k} \to f$ almost everywhere.
+	On the other hand, by the dominated convergence theorem with dominating function $\abs{\hat f}$, the right hand side converges to $\frac{1}{(2\pi)^d} \int_{\mathbb R^d} e^{-i\inner{u,x}} \hat f(u) \dd{u}$.
+	So this is equal to $\lim_{t_k \to 0} f_{t_k}$ almost everywhere by uniqueness of limits.
+\end{proof}
+\begin{theorem}[Plancherel]
+	Let $f \in L^1(\mathbb R^d) \cap L^2(\mathbb R^d)$.
+	Then $\norm{f}_2 = (2\pi)^{-\frac{d}{2}} \norm{\hat f}_2$.
+\end{theorem}
+\begin{remark}
+	By the Pythagorean identity, $\inner{f, g} = (2\pi)^{-d} \inner{\hat f, \hat g}$.
+\end{remark}
+\begin{proof}
+	Initially, we assume $\hat f \in L^1$.
+	In this case, $f, \hat f \in L^\infty$, and $(x,u) \mapsto f(x)\hat f(u)$ is integrable for the product Lebesgue measure $\dd{x} \otimes \dd{u}$ on $\mathbb R^d \times \mathbb R^d$, so Fubini's theorem for bounded functions applies.
+	\begin{align*}
+		(2\pi)^d \norm{f}_2^2 &= (2\pi)^d \int_{\mathbb R^d} f(x) \overline{f(x)} \dd{x} \\
+		&= \int_{\mathbb R^d} \qty(\int_{\mathbb R^d} e^{-i\inner{u,x}} \hat f(u) \dd{u}) \overline{f(x)} \dd{x} \\
+		&= \int_{\mathbb R^d} \hat f(u) \overline{\int_{\mathbb R^d} e^{i\inner{u,x}} f(x) \dd{x}} \dd{u} \\
+		&= \int_{\mathbb R^d} \hat f(u) \overline{\hat f(u)} \dd{u} \\
+		&= \norm{\hat f}_2^2
+	\end{align*}
+	To extend this result to general $f$, we take the Gaussian convolutions $f \ast g_t = f_t$ s.t. $f_t \to f$ in $L^2$.
+	By the continuity of the norm, $\norm{f_t}_2 \to \norm{f}_2$.
+	Since $\abs{\hat f(u) e^{-\frac{\abs{u}^2 t}{2}}}^2$ increases to $\abs{\hat f(u)}^2$, we have by monotone convergence that $\norm{\hat f_t}_2^2 \uparrow \norm{\hat f}_2^2$.
+	Therefore, since the Plancherel identity holds for the $f_t$,
+	\[ \norm{f}_2^2 = \lim_{t \to 0} \norm{f_t}_2^2 = \lim_{t \to 0} (2\pi)^{-d} \norm{\hat f_t}_2^2 = (2\pi)^{-d} \norm{\hat f}_2^2 \]
+\end{proof}
+\begin{remark}
+	Since $L_1 \cap L_2$ is dense in $L^2$, we can extend the linear operator $F_0(f) = (2\pi)^{-\frac{d}{2}} \hat f$ to $L^2$ by continuity to a linear isometry $F \colon L^2 \to L^2$ known as the \emph{Fourier--Plancherel transform}.
+	One can show that $F$ is surjective with inverse $F^{-1} \colon L^2 \to L^2$.
+\end{remark}
+\begin{example}
+	Consider the Dirac measure $\delta_0$ on $\mathbb R$, so $\hat \delta_0(u) = \int_{\mathbb R} e^{iux} \dd{\delta_0(x)} = 1$.
+	But the inverse Fourier transform would be $\frac{1}{2\pi} \int_{\mathbb R} e^{iux} \dd{u}$ which is not a Lebesgue integrable function.
+\end{example}
+\begin{theorem}
+	Let $X$ be a random vector in $\mathbb R^d$ with law $\mu_X$.
+	Then the characteristic function $\varphi_X = \hat \mu_X$ uniquely determines $\mu_X$.
+	In addition, if $\varphi_X \in L^1$, then $\mu_X$ has a probability density function $f_X$ which can be computed almost everywhere by $\frac{1}{(2\pi)^d} \int_{\mathbb R^d} e^{-i\inner{u,x}} \varphi_X(u) \dd{u}$.
+\end{theorem}
+\begin{proof}
+	Let $Z = (Z_1, \dots, Z_d)$ be a vector of independent and identically distributed r.v.s, independent of $X$, with $Z_j \sim N(0,1)$.
+	Then $\sqrt{t} Z$ has probability density function $g_t$.
+	Then $X + \sqrt{t} Z$ has probability density function $f_t = \mu_X \ast g_t$.
+	This is a Gaussian convolution since $\mu_X \ast g_t = \mu_X \ast g_{\frac t 2} \ast g_{\frac t 2}$.
+	Hence,
+	\[ f_t(x) = \frac{1}{(2\pi)^d} \int_{\mathbb R^d} e^{i\inner{u,x}} \varphi_X(u) e^{-\frac{\abs{u}^2 t}{2}} \dd{u} \]
+	which is uniquely determined by $\varphi_X$.
+	We show on an example sheet that two Borel probability measures $\mu, \nu$ on $\mathbb R^d$ coincide if and only if $\mu(g) = \nu(g)$ for all $g \colon \mathbb R^d \to \mathbb R$ that are bounded, continuous, and have compact support.
+	Now,
+	\[ \int_{\mathbb R^d} g(x) f_t(x) \dd{x} = \expect{\underbrace{g(X + \sqrt{t} Z)}_{\to X \text{ a.s.}}} \]
+	Since $\abs{g(X + \sqrt{t}Z)} \leq \norm{g}_\infty < \infty$, by the bounded convergence theorem, this converges to $\expect{g(X)} = \int_{\mathbb R^d} g(x) \dd{\mu_X(x)}$.
+	So by uniqueness of limits, $\varphi_X$ determines $\mu_X$.
+
+	If $\varphi_X \in L^1$, by dominated convergence, $f_t(x)$ converges everywhere to some function $f_X$.
+	In particular, since $\mu_X \ast g_t \geq 0$, the limit $f_X$ is also nonnegative on $\mathbb R^d$.
+	Then, for any bounded continuous function on compact support $g \in C^b_c(\mathbb R^d)$,
+	\[ \int_{\mathbb R^d} g(x) f_X(x) \dd{x} = \int_{\mathbb R^d} g(x) \lim_{t \to 0} \underbrace{f_t(x)}_{\norm{\varphi_X}_1} \dd{x} = \lim_{t \to 0} \int_{\mathbb R^d} g(x) f_t(x) \dd{x} = \int_{\mathbb R^d} g(x) \dd{\mu_X(x)} \]
+	by the dominated convergence theorem, since $g$ has compact support.
+\end{proof}
+\begin{definition}
+	A sequence $(\mu_n)_{n \in \mathbb N}$ of Borel probability measures on $\mathbb R^d$ \emph{converges weakly} to a Borel probability measure $\mu$ if $\mu_n(g) \to \mu(g)$ for all $g \colon \mathbb R^d \to \mathbb R$ bounded and continuous.
+	If $(X_n)_{n \in \mathbb N}, X$ are random vectors with laws $(\mu_{X_n}), \mu_X$ s.t. $\mu_{X_n}$ converges weakly to $\mu_X$, we say $(X_n)$ converges weakly to $X$.
+\end{definition}
+\begin{remark}
+	If $d = 1$, weak convergence is equivalent to convergence in distribution; this is proven on an example sheet.
+	One can also show that convergence of $\mu_n(g)$ to $\mu(g)$ for all $g \in C_c^\infty(\mathbb R^d)$ suffices to show weak convergence, where $C_c^\infty(\mathbb R^d)$ is the space of smooth functions of compact support.
+	This is equivalent to the notion of weak-${}^\star$ convergence on the function space $C_b(\mathbb R^d)$.
+\end{remark}
+\begin{theorem}[L\'evy's continuity theorem]
+	Let $X_n, X$ be random vectors in $\mathbb R^d$, s.t. $\varphi_{X_n}(u) \to \varphi_X(u)$ for all $u$, as $n \to \infty$.
+	Then $\mu_{X_n} \to \mu_X$ weakly.
+\end{theorem}
+\begin{remark}
+	The converse holds by definition of weak convergence, testing against the complex exponentials in the Fourier transform.
+\end{remark}
+\begin{proof}
+	Let $Z = (Z_1, \dots, Z_d)$ be a vector of standard normal r.v.s, independent from each other, $X_n$, and $X$.
+	Let $g \in C_c^\infty(\mathbb R^d)$.
+	Then $g \in L^1(\mathbb R^d)$, and is Lipschitz by the mean value theorem, as its first derivative is bounded.
+	Let $\abs{g(x) - g(y)} \leq \norm{g}_{\mathrm{Lip}} \abs{x - y}$.
+	Let $\varepsilon > 0$.
+	Let $t > 0$ be sufficiently small s.t. $\sqrt t \norm{g}_{\mathrm{Lip}} \expect{\abs{Z}} < \frac{\varepsilon}{3}$.
+	Then,
+	\begin{align*}
+		\abs{\mu_{X_n}(g) - \mu_X(g)} &= \abs{\expect{g(X_n)} - \expect{g(X)}} \\
+		&\leq \expect{\abs{g(X_n) - g(X_n + \sqrt t Z)}} + \expect{\abs{g(X) - g(X + \sqrt t Z)}} \\
+		&+ \abs{\expect{g(X_n + \sqrt t Z) - g(X + \sqrt t Z)}} \\
+		&\leq 2\norm{g}_{\mathrm{Lip}}\sqrt t \expect{\abs{Z}} + \abs{\expect{g(X_n + \sqrt t Z) - g(X + \sqrt t Z)}} \\
+		&\leq \frac{2\varepsilon}{3} + \abs{\expect{g(X_n + \sqrt t Z) - g(X + \sqrt t Z)}}
+	\end{align*}
+	We show that the remaining term can be made less than $\frac{\varepsilon}{3}$ as $n \to \infty$.
+	Let $f_{t,n}(x) = g_t \ast \mu_{X_n}$.
+	Then, by Fourier inversion for Gaussian convolutions,
+	\begin{align*}
+		\expect{g(X_n + \sqrt t Z)} &= \int_{\mathbb R^d} g(x) f_{t,n}(x) \dd{x} \\
+		&= \frac{1}{(2\pi)^d} \int_{\mathbb R^d} g(x) \int_{\mathbb R^d} e^{-i\inner{u,x}} \varphi_{X_n}(u) e^{-\frac{\abs{u}^2 t}{2}} \dd{u} \dd{x}
+	\end{align*}
+	Since characteristic functions are bounded by 1, we can apply the dominated convergence theorem with dominating function $\abs{g(x)} e^{-\frac{\abs{u}^2 t}{2}}$ to find
+	\begin{align*}
+		\expect{g(X_n + \sqrt t Z)} &\to \frac{1}{(2\pi)^d} \int_{\mathbb R^d} g(x) \int_{\mathbb R^d} e^{-i\inner{u,x}} \varphi_X(u) e^{-\frac{\abs{u}^2 t}{2}} \dd{u} \dd{x} \\
+		&= \int_{\mathbb R^d} g(x) f_t(x) \dd{x} \\
+		&= \expect{g(X + \sqrt t Z)}
+	\end{align*}
+	where $f_t = g_t \ast \mu_X$.
+	So as $n \to \infty$, the difference between these two terms can be made less than $\frac{\varepsilon}{3}$ as required.
+\end{proof}
+\begin{theorem}[central limit theorem]
+	Let $X_1, \dots, X_n$ be independent and identically distributed r.v.s with $\expect{X_i} = 0$ and $\Var{X_i} = 1$.
+	Let $S_n = \sum_{i=1}^n X_n$.
+	Then
+	\[ \frac{1}{\sqrt{n}} S_n \xrightarrow{\text{weakly}} Z \sim N(0,1) \]
+	In particular,
+	\[ \prob{\frac{1}{\sqrt{n}} S_n \leq x} \to \prob{Z \leq x} \]
+\end{theorem}
+\begin{proof}
+	Let $X = X_1$.
+	The characteristic function $\varphi(u) = \varphi_X(u) = \expect{e^{iuX}}$ satisfies $\varphi(0) = 1$, $\varphi'(u) = i \expect{X e^{iuX}}$, $\varphi''(u) = i^2 \expect{X^2 e^{iuX}}$.
+	We can find $\varphi'(0) = i\expect{X} = 0$ and $\varphi''(0) = -\expect{X^2} = -\Var X = -1$.
+	By Taylor's theorem, $\varphi(v) = 1 - \frac{v^2}{2} + o(v^2)$ as $v \to 0$.
+	Now, denoting $\varphi_n(u) = \varphi_{\frac{1}{\sqrt n} S_n}(u)$, we can write
+	\begin{align*}
+		\varphi_n(u) &= \expect{e^{i\frac{u}{\sqrt n} (X_1 + \dots + X_n)}} \\
+		&= \prod_{j=1}^n \expect{e^{i\frac{u}{\sqrt n} X_j}} \\
+		&= \qty[\varphi\qty(\frac{u}{\sqrt n})]^n \\
+		&= \qty[1 - \frac{u^2}{2n} + o\qty(\frac{1}{n})]^n
+	\end{align*}
+	The complex logarithm satisfies $\log(1 + z) = z + o(z)$, so by taking logarithms, we find
+	\[ \log \varphi_n(u) = n \log\qty(1 - \frac{u^2}{2n} + o\qty(\frac{1}{n})) = -\frac{u^2}{2} \]
+	Hence, $\varphi_n(u) \to e^{-\frac{\abs{u}^2}{2}} = \varphi_Z(u)$.
+	So by L\'evy's continuity theorem, the result follows.
+\end{proof}
+\begin{remark}
+	This theorem extends to $\mathbb R^d$ by using the next proposition, using the fact that $X_n \to X$ weakly in $\mathbb R^d$ if and only if $\inner{X_n, v} \to \inner{X, v}$ weakly in $\mathbb R$ for all $v \in \mathbb R^d$.
+\end{remark}
+\begin{definition}
+	A r.v. $X$ in $\mathbb R^d$ is called a \emph{Gaussian vector} if $\inner{X_n, v}$ are Gaussian for each $v \in \mathbb R^d$.
+\end{definition}
+\begin{proposition}
+	Let $X$ be a Gaussian vector in $\mathbb R^d$.
+	Then $Z = AX + b$ is a Gaussian vector in $\mathbb R^m$ where $A$ is an $m \times d$ matrix and $b \in \mathbb R^m$.
+	Also, $X \in L^2(\mathbb R^d)$, and $\mu = \expect{X}$ and $V = \Cov{X_i, X_j}$ exist and determine $\mu_X$.
+	The characteristic function is
+	\[ \varphi_X(u) = e^{i\inner{\mu,u} - \frac{\inner{u,Vu}}{2}} \]
+	If $V$ is invertible, then $\mu_X$ has a probability density function
+	\[ f_X(x) = (2\pi)^{-\frac{d}{2}} (\det V)^{-\frac{1}{2}} \exp{-\inner{x-\mu, V^{-1}(x - \mu)}} \]
+	Subvectors $X_{(1)}, X_{(2)}$ of $X$ are independent if and only if $\Cov{X_{(1)}, X_{(2)}} = 0$.
+\end{proposition}
+\begin{proposition}
+	Let $X_n \to X$ weakly in $\mathbb R^d$ as $n \to \infty$.
+	Then,
+	\begin{enumerate}
+		\item if $h \colon \mathbb R^d \to \mathbb R^k$ is continuous, then $h(X_n) \to h(X)$ weakly;
+		\item if $\abs{X_n - Y_n} \to 0$ in probability, then $Y_n \to X$ weakly;
+		\item if $Y_n \to c$ in probability where $c$ is constant on $\Omega$, then $(X_n, Y_n) \to (X, c)$ weakly in $\mathbb R^d \times \mathbb R^d$.
+	\end{enumerate}
+\end{proposition}
+\begin{remark}
+	Combining parts (iii) and (i), $X_n + Y_n \to X + c$ weakly if $Y_n \to c$ in probability.
+	If $d = 1$, then in addition $X_n Y_n \to c X$ weakly.
+\end{remark}
+\begin{proof}
+	\emph{Part (i).}
+	This follows from the fact that $gh$ is continuous for any test function $g$.
+
+	\emph{Part (ii).}
+	Let $g \colon \mathbb R^d \to \mathbb R$ be bounded and Lipschitz continuous.
+	Then
+	\[ \abs{\expect{g(Y_n)} - \expect{g(X)}} \leq \underbrace{\abs{\expect{g(X_n)} - \expect{g(X)}}}_{< \frac{\varepsilon}{3}} + \expect{\abs{g(X_n) - g(Y_n)}} \]
+	where the bound on $\expect{g(X_n)} - \expect{g(X)}$ holds for sufficiently large $n$.
+	Then the remaining term is upper bounded by
+	\[ \expect{\abs{g(X_n) - g(Y_n)}} \qty(1_{\qty{\abs{X_n - Y_n} \leq \frac{\varepsilon}{3\norm{g}_{\mathrm{Lip}}}}} + 1_{\qty{\abs{X_n - Y_n} > \frac{\varepsilon}{3\norm{g}_{\mathrm{Lip}}}}} ) \]
+	\[ \leq \norm{g}_{\mathrm{Lip}} \frac{\varepsilon}{3\norm{g}_{\mathrm{Lip}}} + 2\norm{g}_\infty \prob{\abs{X_n - Y_n} > \frac{\varepsilon}{3\norm{g}_{\mathrm{Lip}}}} < \frac{2\varepsilon}{3} \]
+	for sufficiently large $n$.
+
+	\emph{Part (iii).}
+	$\abs{(X_n, c) - (X_n, Y_n)} = \abs{Y_n - c} \to 0$ in probability.
+	Also, $\expect{g(X_n, c)} \to \expect{g(X, c)}$ for all bounded continuous maps $g \colon \mathbb R^d \times \mathbb R^d \to \mathbb R$, so $(X_n, c) \to (X, c)$ weakly.
+	Hence, by (ii), $(X_n, Y_n) \to (X, c)$ weakly.
+\end{proof}
diff --git a/ProbAndMeasure/probmeasure.pdf b/ProbAndMeasure/probmeasure.pdf
index 352c35d..34f37f1 100644
Binary files a/ProbAndMeasure/probmeasure.pdf and b/ProbAndMeasure/probmeasure.pdf differ
diff --git a/ProbAndMeasure/probmeasure.tex b/ProbAndMeasure/probmeasure.tex
index 5a70899..bdf3d50 100644
--- a/ProbAndMeasure/probmeasure.tex
+++ b/ProbAndMeasure/probmeasure.tex
@@ -15,7 +15,11 @@
 \newcommand{\expect}[1]{\mathbb{E}\left[{#1}\right]}
 % \DeclarePairedDelimiter\ceil{\lceil}{\rceil}
 \DeclarePairedDelimiter\floor{\lfloor}{\rfloor}
+% \renewcommand{\norm}[1]{\left \lVert #1 \right \rVert}
+\newcommand{\wildcard}{{}\cdot{}}
+\DeclareMathOperator*{\esssup}{ess\ sup}
 % \DeclarePairedDelimiter\Brackets{[\![}{]\!]}
+\newcommand{\hhat}[1]{\hat{\hat{#1}}}
 
 % \includeonly{02_measurable_functions.tex}
 
@@ -63,4 +67,6 @@
     \include{02_measurable_functions.tex}
     \include{03_integration.tex}
     \include{04_product_measures.tex}
+    \include{05_function_spaces_and_norms.tex}
+    \include{06_fourier_analysis.tex}
 \end{document}
\ No newline at end of file
diff --git a/preamble.tex b/preamble.tex
index 65d5c8f..8cf54b4 100644
--- a/preamble.tex
+++ b/preamble.tex
@@ -261,7 +261,7 @@
 \newtheorem{remark}{Remark}
 \newtheorem*{note}{Note}
 
-% Fancy Section and Chapter Heads 
+% Fancy Section and Chapter Heads
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 \makeatletter
@@ -324,7 +324,7 @@
 \usepackage{cancel}
 
 \newcommand\hcancel[2][black]{\setbox0=\hbox{$#2$}%
-\rlap{\raisebox{.45\ht0}{\textcolor{#1}{\rule{\wd0}{1pt}}}}#2} 
+\rlap{\raisebox{.45\ht0}{\textcolor{#1}{\rule{\wd0}{1pt}}}}#2}
 
 \newcommand\Ccancel[2][black]{
     \let\OldcancelColor\CancelColor