diff --git a/CodingAndCryptography/01_noiseless_coding.tex b/CodingAndCryptography/01_noiseless_coding.tex
index 92cc6a8..8df5971 100644
--- a/CodingAndCryptography/01_noiseless_coding.tex
+++ b/CodingAndCryptography/01_noiseless_coding.tex
@@ -258,7 +258,7 @@ \subsection{Optimal codes}
 \subsection{Huffman coding}
 Let $\mathcal A = \qty{\mu_1, \dots, \mu_m}$ and $p_i = \prob{X = \mu_i}$.
 We assume $a = 2$ and $\mathcal B = \qty{0,1}$ for simplicity.
-Without loss of generality, we can assume $p_1 \geq p_2 \geq \dots \geq p_m$.
+WLOG, we can assume $p_1 \geq p_2 \geq \dots \geq p_m$.
 We construct an optimal code inductively.
 
 If $m = 2$, we take codewords $0$ and $1$.
@@ -356,8 +356,8 @@ \subsection{Huffman coding}
     \begin{align*}
         \expect{S_m} = \expect{S_{m-1}} + p_{m-1} + p_m
     \end{align*}
-    Let $c_m'$ be an optimal code for $X_m$, which without loss of generality can be chosen to be prefix-free.
-    Without loss of generality, the last two codewords of $c_m'$ can be chosen to have the largest possible length and differ only in the final position, by the previous lemma.
+    Let $c_m'$ be an optimal code for $X_m$, which wlog can be chosen to be prefix-free.
+    WLOG, the last two codewords of $c_m'$ can be chosen to have the largest possible length and differ only in the final position, by the previous lemma.
     Then, $c_m'(\mu_{m-1}) = y 0$ and $c_m'(\mu_m) = y 1$ for some $y \in \qty{0,1}^\star$.
     Let $c_{m-1}'$ be the prefix-free code for $X_{m-1}$ given by
     \begin{align*}
diff --git a/CodingAndCryptography/03_information_theory.tex b/CodingAndCryptography/03_information_theory.tex
index 310caa3..c348c8c 100644
--- a/CodingAndCryptography/03_information_theory.tex
+++ b/CodingAndCryptography/03_information_theory.tex
@@ -246,7 +246,7 @@ \subsection{Capacity}
     We show that this value converges to zero as $n \to \infty$ using the next lemma.
 \end{proof}
 
-\begin{lemma}
+\begin{lemma} \label{lem:9.5}
     Let $\varepsilon > 0$.
     A binary symmetric channel with error probability $p$ is used to transmit $n$ digits.
     Then,
@@ -382,7 +382,7 @@ \subsection{Shannon's second coding theorem}
 The maximum is taken over all discrete r.v.s $X$ taking values in $\mathcal A$, or equivalently.
 This maximum is attained since $I$ is continuous and the space
 \begin{align*}
-        \qty{(p_1, \dots, p_m) \in \mathbb R^m \midd p_i \geq 0, \sum_{i=1}^m p_i = 1}
+        \qty{(p_1, \dots, p_m) \in \mathbb R^m : p_i \geq 0, \sum_{i=1}^m p_i = 1}
     \end{align*}
 is compact.
 The information capacity depends only on the channel matrix.
@@ -498,9 +498,9 @@ \subsection{Shannon's second coding theorem}
 \end{remark}
 
 \begin{proof}
-    We use the method of random coding.
-    Without loss of generality let $p < \frac{1}{2}$.
-    Let $\varepsilon > 0$ s.t. $p + \varepsilon < \frac{1}{2}$ and $R < 1 - H(p + \varepsilon)$.
+    We use the `method of random coding'.
+    WLOG let $p < \frac{1}{2}$.
+    Let $\varepsilon > 0$ s.t. $p + \varepsilon < \frac{1}{2}$ and $R < 1 - H(p + \varepsilon)$ as $H$ cts.
     We use minimum distance decoding, and in the case of a tie, we make an arbitrary choice.
     Let $m = \floor*{2^{nR}}$, and let $C = \qty{c_1, \dots, c_m}$ be a code chosen uniformly at random from $\mathcal C = \qty{[n,m]\text{-codes}}$, a set of size $\binom{2^n}{m}$.
 
@@ -518,7 +518,7 @@ \subsection{Shannon's second coding theorem}
     We consider the two cases separately.
 
     In the first case with $d(c_i,Y) > r$, $\prob{d(c_i,Y) > r}$ is the probability that the channel makes more than $r$ errors, and hence more than $n(p + \varepsilon)$ errors.
-    We have already shown that this converges to zero as $n \to \infty$.
+    We have already shown that this converges to zero as $n \to \infty$ in \cref{lem:9.5}.
 
     In the second case with $d(c_i,Y) \leq r$, if $j \neq i$,
     \begin{align*}
@@ -543,16 +543,40 @@ \subsection{Shannon's second coding theorem}
 \begin{proof}
     Let $R'$ be s.t. $R < R' < 1 - H(p)$.
     Then, apply the previous result to $R'$ to construct a sequence of codes $(C_n')_{n \geq 1}$ of length $n$ and size $\floor*{2^{nR'}}$, where $e(C_n') \to 0$.
-    Order the codewords of $C_n'$ by the probability of error given that the codeword was sent, and delete the worst half.
+    Order the codewords of $C_n'$ by $\mathbb{P}(\text{error} \mid c \text{ sent})$ and delete the worst half.
     This gives a code $C_n$ with $\hat e(C_n) \leq 2 e(C_n')$.
     Hence $\hat e(C_n) \to 0$ as $n \to \infty$.
+
     Since $C_n$ has length $n$, and size $\frac{1}{2} \floor*{2^{nR'}} = \floor*{2^{nR' - 1}}$.
     But $2^{nR' - 1} = 2^{n(R' - \frac{1}{n})} \geq 2^{nR}$ for sufficiently large $n$.
     So we can replace $C_n'$ with a code of smaller size $\floor*{2^{nR}}$ and still have $\hat e(C_n) \to 0$ and $\rho(C_n) \to R$ as $n \to \infty$.
 \end{proof}
 
-Therefore, a binary symmetric channel with error probability $p$ has operational capacity $1 - H(p)$, as we can transmit reliably at any rate $R < 1 - H(p)$, and the capacity is at most $1 - H(p)$.
-The result shows that codes with certain properties exist, but does not give a way to construct them.
+\begin{remark}
+    \begin{enumerate}
+        \item A BSC with error prob $p$ has operational capacity $1 - H(p)$, as we can transmit reliably at any rate $R < 1 - H(p)$.
+        \item This result shows us that good codes exists, but the proof does not tell us how to construct them
+    \end{enumerate}
+\end{remark}
+
+\begin{example}
+    Suppose capacity is $0.8$.
+    Let us have a message string of $0$s and $1$s.
+    Take $R = 0.75$ ($< 0.8$).
+    For $n$ large, $\exists$ set of $2^{0.75n}$ codewords of length $n$ that have error prob below some prescribed threshold. \\
+    To encode message stream from the source, we:
+    \begin{itemize}
+        \item Break it into blocks of size $3 \ceil{\frac{n}{4}} = m$ sufficiently large ($\geq \frac{3}{4} n_0(\epsilon)$)
+        \item encode these $m$-blocks into $C_n$ using codewords of length $\frac{4}{3} m$ for each $m$-block
+        \item transmit new message through channel.
+    \end{itemize}
+    You then get
+    \begin{itemize}
+        \item marked \underline{reduction} in error prob but
+        \item at the cost of \underline{complexity} of encoding and \underline{slower} rate of transmission.
+    \end{itemize}
+    % \underline{we don't yet know the code}!
+\end{example}
 
 \subsection{The Kelly criterion}
 Let $0 < p < 1$, $u > 0$, $0 \leq w < 1$.
diff --git a/CodingAndCryptography/04_algebraic_coding_theory.tex b/CodingAndCryptography/04_algebraic_coding_theory.tex
index 1697d0a..e9b2cfb 100644
--- a/CodingAndCryptography/04_algebraic_coding_theory.tex
+++ b/CodingAndCryptography/04_algebraic_coding_theory.tex
@@ -1,105 +1,135 @@
 \section{Algebraic coding theory}
 
 \subsection{Linear codes}
-\begin{definition}
+
+\begin{definition}[Linear Code]
     A binary code $C \subseteq \mathbb F_2^n$ is \vocab{linear} if $0 \in C$, and whenever $x, y \in C$, we have $x + y \in C$.
 \end{definition}
+
 Equivalently, $C$ is a vector subspace of $\mathbb F_2^n$.
-\begin{definition}
+
+\begin{definition}[Rank]
     The \vocab{rank} of a linear code $C$, denoted $\rank C$, is its dimension as an $\mathbb F_2$-vector space.
     A linear code of length $n$ and rank $k$ is called an $(n,k)$-code.
     If it has minimum distance $d$, it is called an $(n,k,d)$-code.
 \end{definition}
+
 Let $v_1, \dots, v_k$ be a basis for $C$.
-Then $C = \qty{\sum_{i=1}^k \lambda_i v_i \mid \lambda_i \in \mathbb F_2}$.
+Then $C = \qty{\sum_{i=1}^k \lambda_i v_i : \lambda_i \in \mathbb F_2}$.
 The size of the code is therefore $2^k$, so an $(n,k)$-code is an $[n,2^k]$-code, and an $(n,k,d)$-code is an $[n,2^k,d]$-code.
 The information rate is $\frac{k}{n}$.
-\begin{definition}
+
+\begin{definition}[Weight]
     The \vocab{weight} of $x \in \mathbb F_2^n$ is $w(x) = d(x,0)$.
 \end{definition}
+
 \begin{lemma}
     The minimum distance of a linear code is the minimum weight of a nonzero codeword.
 \end{lemma}
+
 \begin{proof}
     Let $x, y \in C$.
     Then, $d(x,y) = d(x+y,0) = w(x+y)$.
     Observe that $x \neq y$ iff $x + y \neq 0$, so $d(C)$ is the minimum $w(x+y)$ for $x + y \neq 0$.
 \end{proof}
-\begin{definition}
+
+\begin{definition}[Inner Product]
     Let $x, y \in \mathbb F_2^n$.
     Define $x \cdot y = \sum_{i=1}^n x_i y_i \in \mathbb F_2$.
     This is symmetric and bilinear.
 \end{definition}
-There are nonzero $x$ s.t. $x \cdot x = 0$.
-\begin{definition}
+
+\begin{warning}
+    There are nonzero $x$ s.t. $x \cdot x = 0$.
+\end{warning}
+
+\begin{definition}[Parity Check Code]
     Let $P \subseteq \mathbb F_2^n$.
     The \vocab{parity check code} defined by $P$ is
     \begin{align*}
-        C = \qty{x \in \mathbb F_2^n \mid \forall p \in P,\,p \cdot x = 0}
+        C = \qty{x \in \mathbb F_2^n : \forall p \in P,\,p \cdot x = 0}
     \end{align*}
 \end{definition}
-\begin{example}
+
+\begin{example} ~\vspace*{-1.5\baselineskip}
     \begin{enumerate}
         \item $P = \qty{11\dots 1}$ gives the simple parity check code.
         \item $P = \qty{1010101, 0110011, 0001111}$ gives Hamming's original $[7,16,3]$-code.
         \item $C^+$ and $C^-$ are linear if $C$ is linear.
     \end{enumerate}
 \end{example}
+
 \begin{lemma}
     Every parity check code is linear.
 \end{lemma}
+
 \begin{proof}
     $0 \in C$ as $p \cdot 0 = 0$.
     If $p \cdot x = 0$ and $p \cdot y = 0$ then $p \cdot (x + y) = 0$, so $x, y \in C$ implies $x + y \in C$.
 \end{proof}
-\begin{definition}
+
+\begin{definition}[Dual Code]
     Let $C \subseteq \mathbb F_2^n$ be a linear code.
     The \vocab{dual code} $C^\perp$ is defined by
     \begin{align*}
-        C^\perp = \qty{x \in \mathbb F_2^n \mid \forall y \in C,\, x \cdot y = 0}
+        C^\perp = \qty{x \in \mathbb F_2^n : \forall y \in C,\, x \cdot y = 0}
     \end{align*}
 \end{definition}
+
 By definition, $C^\perp$ is a parity check code, and hence is linear.
 Note that $C \cap C^\perp$ may contain elements other than 0.
+
 \begin{lemma}
     $\rank C + \rank C^\perp = n$.
 \end{lemma}
+
 \begin{proof}
     One can prove this by defining $C^\perp$ as an annihilator from linear algebra.
     A proof using coding theory is shown later.
 \end{proof}
+
 \begin{corollary}
     Let $C$ be a linear code.
     Then $(C^\perp)^\perp = C$.
     In particular, all linear codes are parity check codes, defined by $C^\perp$.
 \end{corollary}
+
 \begin{proof}
     If $x \in C$, then $x \cdot y = 0$ for all $y \in C^\perp$ by definition, so $x \in (C^\perp)^\perp$.
     Then $\rank C = n - \rank C^\perp = n - (n - \rank (C^\perp)^\perp) = \rank (C^\perp)^\perp$, so $C = (C^\perp)^\perp$.
 \end{proof}
-\begin{definition}
+
+\begin{definition}[Generator Matrix]
     Let $C$ be an $(n,k)$-code.
     A \vocab{generator matrix} $G$ for $C$ is a $k \times n$ matrix where the rows form a basis for $C$.
     A \vocab{parity check matrix} $H$ for $C$ is a generator matrix for the dual code $C^\perp$, so it is an $(n-k) \times n$ matrix.
 \end{definition}
-The codewords of a linear code can be viewed either as linear combinations of rows of $G$, or linear dependence relations between the columns of $H$, so $C = \qty{x \in \mathbb F_2^n \mid H x = 0}$.
-\begin{definition}
+
+The codewords of a linear code can be viewed either as linear combinations of rows of $G$, or linear dependence relations between the columns of $H$, so $C = \qty{x \in \mathbb F_2^n : H x = 0}$.
+
+\subsection{Syndrome decoding}
+\begin{definition}[Syndrome]
     Let $C$ be an $(n, k)$-code.
     The \vocab{syndrome} of $x \in \mathbb F_2^n$ is $Hx$.
 \end{definition}
+
 If we receive a word $x = c + z$ where $c \in C$ and $z$ is the error pattern, $Hx = Hz$ as $Hc = 0$.
 If $C$ is $e$-error correcting, we precompute $Hz$ for all $z$ for which $w(z) \leq e$.
 On receiving $x$, we can compute the syndrome $Hx$ and find this entry in the table of values of $Hz$.
 If successful, we decode $c = x - z$, with $d(x,c) = w(z) \leq e$.
-\begin{definition}
+
+\begin{definition}[Equivalent]
     Codes $C_1, C_2 \subseteq \mathbb F_2^n$ are \vocab{equivalent} if there exists a permutation of bits that maps codewords in $C_1$ to codewords in $C_2$.
 \end{definition}
+
 Codes are typically only considered up to equivalence.
+
 \begin{lemma}
     Every $(n, k)$-linear code is equivalent to one with generator matrix with block form $\begin{pmatrix}
         I_k & B
     \end{pmatrix}$ for some $k \times (n - k)$ matrix $B$.
 \end{lemma}
+
 \begin{proof}
     Let $G$ be a $k \times n$ generator matrix for $C$.
     Using Gaussian elimination, we can transform $G$ into row echelon form
@@ -110,7 +140,7 @@ \subsection{Linear codes}
         \end{cases}
     \end{align*}
     for some $\ell(1) < \ell(2) < \dots < \ell(k)$.
-    Permuting the columns replaces $C$ with an equivalent code, so without loss of generality we may assume $\ell(i) = i$.
+    Permuting the columns replaces $C$ with an equivalent code, so wlog we may assume $\ell(i) = i$.
     Hence,
     \begin{align*}
         G = \begin{pmatrix}
@@ -121,14 +151,21 @@ \subsection{Linear codes}
     \end{align*}
     Further row operations eliminate $\star$ to give $G$ in the required form.
 \end{proof}
+
 A message $y \in \mathbb F_2^k$ viewed as a row vector can be encoded as $yG$.
 If $G = \begin{pmatrix}
     I_k & B
 \end{pmatrix}$, then $yG = (y, yB)$ where $y$ is the message and $yB$ is a string of check digits.
+
+\begin{definition}[Systematic Code]
+    A \vocab{systematic code} is any code whose codewords can be split up in this manner.
+\end{definition}
+
 We now prove the following lemma that was stated earlier.
 \begin{lemma}
     $\rank C + \rank C^\perp = n$.
 \end{lemma}
+
 \begin{proof}
     Let $C$ have generator matrix $G = \begin{pmatrix}
         I_k & B
@@ -137,19 +174,21 @@ \subsection{Linear codes}
     Its kernel is $C^\perp$.
     By the rank-nullity theorem, $\dim \mathbb F_2^n = \dim \ker \gamma + \dim \Im \gamma$, so $n = \rank C + \rank C^\perp$ as required.
 \end{proof}
+
 \begin{lemma}
     An $(n, k)$-code with generator matrix $G = \begin{pmatrix}
         I_k & B
     \end{pmatrix}$ has parity check matrix $H$ of the form $\begin{pmatrix}
-        B^\transpose & I_{n-k}
+        -B^\transpose & I_{n-k}
     \end{pmatrix}$.
 \end{lemma}
+
 \begin{proof}
     \begin{align*}
         GH^\transpose = \begin{pmatrix}
             I_k & B
         \end{pmatrix} \begin{pmatrix}
-            B \\
+            -B \\
             I_{n-k}
         \end{pmatrix} = B + B = 2B = 0
     \end{align*}
@@ -157,6 +196,7 @@ \subsection{Linear codes}
     But $\rank H = n - k$, and $\rank C^\perp = n - k$.
     So $H = C^\perp$, and $C^\perp$ has generator matrix $H$.
 \end{proof}
+
 \begin{lemma}
     Let $C$ be a linear code with parity check matrix $H$.
     Then, $d(C) = d$ iff
@@ -165,18 +205,22 @@ \subsection{Linear codes}
         \item a set of $d$ columns of $H$ are linearly dependent.
     \end{enumerate}
 \end{lemma}
+
 The proof is left as an exercise.
 % see online for proof
 
 \subsection{Hamming codes}
+
 \begin{definition}
     Let $d \geq 1$, and let $n = 2^d - 1$.
     Let $H$ be the $d \times n$ matrix with columns given by the nonzero elements of $\mathbb F_2^d$.
     The \vocab{Hamming $(n, n-d)$-linear code} is the code with parity check matrix $H$.
 \end{definition}
+
 \begin{lemma}
     The Hamming $(n, n-d)$-code $C$ has minimum distance $d(C) = 3$, and is a perfect 1-error correcting code.
 \end{lemma}
+
 \begin{proof}
     Any two columns of $H$ are linearly independent, but there are three linearly dependent columns.
     Hence, $d(C) = 3$.
@@ -196,11 +240,14 @@ \subsection{Reed--Muller codes}
 
 Let $X = \mathbb F_2^d$, so $n = 2^d - \abs{X}$.
 Let $v_0 = (1, \dots, 1)$, and let $v_i = 1_{H_i}$ where $H_i = \qty{p \in X \mid p_i = 0}$ is a coordinate hyperplane.
+
 \begin{definition}
     Let $0 \leq r \leq d$.
     The \vocab{Reed--Muller code} $RM(d,r)$ of \vocab{order} $r$ and length $2^d$ is the linear code spanned by $v_0$ and all wedge products of at most $r$ of the the $v_i$ for $1 \leq i \leq d$.
 \end{definition}
+
 By convention, the empty wedge product is $v_0$.
+
 \begin{example}
     Let $d = 3$, and let $X = \mathbb F_2^3 = \qty{p_1, \dots, p_8}$ in binary order.
     \begin{align*}
@@ -218,16 +265,19 @@ \subsection{Reed--Muller codes}
     \end{align*}
     A generator matrix for Hamming's original code is a submatrix in the top-right corner.
 \end{example}
+
 $RM(3,0)$ is spanned by $v_0$, and is hence the repetition code of length 8.
 $RM(3,1)$ is spanned by $v_0, v_1, v_2, v_3$, which is equivalent to a parity check extension of Hamming's original $(7,4)$-code.
 $RM(3,2)$ is an $(8,7)$-code, and can be shown to be equivalent to a simple parity check code of length 8.
 $RM(3,3)$ is the trivial code $\mathbb F_2^8$ of length 8.
+
 \begin{theorem}
     \begin{enumerate}
         \item The vectors $v_{i_1} \wedge \dots \wedge v_{i_s}$ for $i_1 < \dots < i_s$ and $0 \leq s \leq d$ form a basis for $\mathbb F_2^n$.
         \item The rank of $RM(d,r)$ is $\sum_{s=0}^r \binom{d}{s}$.
     \end{enumerate}
 \end{theorem}
+
 \begin{proof}
     \vocab{Part (i).}
     There are $\sum_{s=0}^d \binom{d}{s} = 2^d = n$ vectors listed, so it suffices to show they are a spanning set, or equivalently $RM(d,d)$ is the trivial code.
@@ -240,17 +290,21 @@ \subsection{Reed--Muller codes}
     $RM(d,r)$ is spanned by $v_{i_1} \wedge \dots \wedge v_{i_s}$ where $i_1 < \dots < i_s$ and $0 \leq s \leq r$.
     Since these are linearly independent, the rank of $RM(d,r)$ is the number of such vectors, which is $\sum_{s=0}^d \binom{d}{s}$.
 \end{proof}
+
 \begin{definition}
     Let $C_1, C_2$ be linear codes of length $n$ where $C_2 \subseteq C_1$.
     The \vocab{bar product} is $C_1 \mid C_2 = \qty{(x \mid x + y) \mid x \in C_1, y \in C_2}$.
 \end{definition}
+
 This is a linear code of length $2n$.
+
 \begin{lemma}
     \begin{enumerate}
         \item $\rank (C_1 \mid C_2) = \rank C_1 + \rank C_2$.
         \item $d(C_1 \mid C_2) = \min \qty{2d(C_1), d(C_2)}$.
     \end{enumerate}
 \end{lemma}
+
 \begin{proof}
     \vocab{Part (i).}
     If $C_1$ has basis $x_1, \dots, x_k$ and $C_2$ has basis $y_1, \dots, y_\ell$, then $C_1 \mid C_2$ has basis
@@ -266,12 +320,14 @@ \subsection{Reed--Muller codes}
     There is a nonzero $x \in C_1$ with $w(x) = d(C_1)$, so $d(C_1 \mid C_2) \leq w(x \mid x) = 2d(C_1)$.
     There is a nonzero $y \in C_2$ with $w(y) = d(C_2)$, giving $d(C_1 \mid C_2) \leq w(0 \mid 0 + y) = d(C_2)$, giving the other inequality as required.
 \end{proof}
+
 \begin{theorem}
     \begin{enumerate}
         \item $RM(d,r) = RM(d-1,r) \mid RM(d-1,r-1)$ for $0 < r < d$.
         \item $RM(d,r)$ has minimum distance $2^{d-r}$ for all $r$.
     \end{enumerate}
 \end{theorem}
+
 \begin{proof}
     \vocab{Part (i).}
     Exercise.
@@ -292,13 +348,16 @@ \subsection{Cyclic codes}
 % Note that $X^2 + X \in \mathbb F_2[X]$ is nonzero, but always evaluates to zero.
 % If $F$ is a field, $F[X]$ is a Euclidean domain using the degree function as the Euclidean function, and has a Euclidean division algorithm.
 If $F$ is a field and $f \in F[X]$, $\faktor{F[X]}{(f)}$ is in bijection with $F^n$ where $n = \deg f$, since $\faktor{F[X]}{(f)}$ is represented by the set of functions of degree less than $\deg f$.
+
 \begin{definition}
     A linear code $C \subseteq \mathbb F_2^n$ is \vocab{cyclic} if
     \begin{align*}
         (a_0, a_1, \dots, a_{n-1}) \in C \implies (a_{n-1}, a_0, \dots, a_{n-2}) \in C
     \end{align*}
 \end{definition}
+
 We identify $\faktor{\mathbb F_2[X]}{(X^n - 1)}$ with $\mathbb F_2^n$, letting $\pi(a_0 + a_1X + \dots + a_{n-1}X^{n-1}) = (a_0, a_1, \dots, a_{n-1})$.
+
 \begin{lemma}
     A code $C \subseteq \mathbb F_2^n$ is cyclic iff $\pi(\mathcal C) = C$ satisfies
     \begin{enumerate}
@@ -307,16 +366,20 @@ \subsection{Cyclic codes}
         \item $f \in \mathbb F_2[X], g \in \mathcal C$ implies $fg \in \mathcal C$.
     \end{enumerate}
 \end{lemma}
+
 Equivalently, $\mathcal C$ is an ideal of $\faktor{\mathbb F_2[X]}{(X^n - 1)}$.
+
 \begin{proof}
     If $g(X) = a_0 + a_1X + \dots + a_{n-1}X^{n-1}$, multiplication by $X$ gives $Xg(X) = a_{n-1} + a_0X + \dots + a_{n-2}X^{n-1}$.
     So $\mathcal C$ is cyclic iff (i) and (ii) hold and $g(X) \in C$ implies $Xg(X) \in C$.
     Linearity then gives (iii).
 \end{proof}
+
 We will identify $C$ with $\mathcal C$.
 The cyclic codes of length $n$ correspond to ideals in $\faktor{\mathbb F_2[X]}{(X^n - 1)}$.
 Such ideals correspond to ideals of $\mathbb F_2[X]$ that contain $X^n - 1$.
 Since $\mathbb F_2[X]$ is a principal ideal domain, these ideals correspond to polynomials $g(X) \in \mathbb F_2[X]$ dividing $X^n - 1$.
+
 \begin{theorem}
     Let $C \trianglelefteq \faktor{\mathbb F_2[X]}{(X^n - 1)}$ be a cyclic code.
     Then, there exists a unique \vocab{generator} polynomial $g(X) \in \mathbb F_2[X]$ s.t.
@@ -326,6 +389,7 @@ \subsection{Cyclic codes}
     \end{enumerate}
     In particular, $p(X) \in \mathbb F_2[X]$ represents a codeword iff $g \mid p$.
 \end{theorem}
+
 \begin{proof}
     Let $g(X) \in \mathbb F_2[X]$ be the polynomial of smallest degree that represents a nonzero codeword of $C$.
     Note that $\deg g < n$.
@@ -342,14 +406,17 @@ \subsection{Cyclic codes}
     Then $g_1 \mid g_2$ and $g_2 \mid g_1$.
     So $g_1 = cg_2$ where $c \in \mathbb F_2^\star$, so $c = 1$.
 \end{proof}
+
 \begin{lemma}
     Let $C$ be a cyclic code of length $n$ with generator $g(X) = a_0 + a_1 X + \dots + a_k X^k$ with $a_k \neq 0$.
     Then $C$ has basis $\qty{g, Xg, X^2g, \dots, X^{n-k-1}g}$.
     In particular, $\rank C = n - k$.
 \end{lemma}
+
 \begin{proof}
     Exercise.
 \end{proof}
+
 \begin{corollary}
     Let $C$ be a cyclic code of length $n$ with generator $g(X) = a_0 + a_1 X + \dots + a_k X^k$ with $a_k \neq 0$.
     Then, a generator matrix for $C$ is given by
@@ -363,10 +430,12 @@ \subsection{Cyclic codes}
     \end{align*}
     This is an $(n - k) \times n$ matrix.
 \end{corollary}
+
 \begin{definition}
     Let $g$ be a generator for $C$.
     The \vocab{parity check polynomial} is the polynomial $h$ s.t. $g(X) h(X) = X^n - 1$.
 \end{definition}
+
 \begin{corollary}
     Writing $h(X) = b_0 + b_1 X + \dots + b_{n-k} X^{n-k}$, the parity check matrix is
     \begin{align*}
@@ -379,20 +448,24 @@ \subsection{Cyclic codes}
     \end{align*}
     which is a $k \times n$ matrix.
 \end{corollary}
+
 \begin{proof}
     One can check that the inner product of the $i$th row of the generator matrix and the $j$th row of the parity check matrix is the coefficient of $X^{n-k-i+j}$ in $g(X) h(X) = X^n - 1$.
     Since $1 \leq i \leq n - k$ and $1 \leq j \leq k$, $0 < n - k - i + j < n$, and such coefficients are zero.
     Hence, the rows of $G$ are orthogonal to the rows of $H$.
     Note that as $b_{n-k} \neq 0$, $\rank H = k = \rank C^\perp$, so $H$ is the parity check matrix.
 \end{proof}
+
 \begin{remark}
     Given a polynomial $f(X) = \sum_{i=0}^m f_i X_i$ of degree $m$, the \vocab{reverse} polynomial is $\check{f}(X) = f_n + f_{n-1}X + \dots + f_0 X^M = X^m f\qty(\frac{1}{X})$.
     The cyclic code generated by $\check{h}$ is the dual code $C^\perp$.
 \end{remark}
+
 \begin{lemma}
     If $n$ is odd, $X^n - 1 = f_1(X) \dots f_t(X)$ where the $f_i(X)$ are distinct irreducible polynomials in $\mathbb F_2[X]$.
     Thus, there are $2^t$ cyclic codes of length $n$.
 \end{lemma}
+
 This is false if $n$ is even, for instance, $X^2 - 1 = (X - 1)^2$.
 The proof follows from Galois theory.
 
@@ -410,21 +483,27 @@ \subsection{BCH codes}
 Let $K = \mathbb F_{2^r}$, and define $\bm \mu_n(K) = \qty{x \in K \mid x^n = 1} \leq K^\times$, which is a cyclic group.
 Since $n \mid (2^r - 1) = \abs{K^\times}$, $\bm \mu_n(K)$ is the cyclic group of order $n$.
 Hence, $\bm \mu_n(K) = \qty{1, \alpha, \alpha^2, \dots, \alpha^{n-1}}$ for some primitive $n$th root of unity $\alpha \in K$.
+
 \begin{definition}
     The cyclic code of length $n$ with \vocab{defining set} $A \subseteq \bm\mu_n(K)$ is the code
     \begin{align*}
         C = \qty{f(X) \in \faktor{\mathbb F_2[X]}{(X^n - 1)} \midd \forall a \in A,\, f(a) = 0}
     \end{align*}
 \end{definition}
+
 The generator polynomial $g(X)$ is the nonzero polynomial of least degree s.t. $g(a) = 0$ for all $a \in A$.
 Equivalently, $g$ is the least common multiple of the minimal polynomials of the elements of $A$.
+
 \begin{definition}
     The cyclic code of length $n$ with defining set $\qty{\alpha, \alpha^2, \dots, \alpha^{\delta - 1}}$ is a \vocab{BCH code} with \vocab{design distance} $\delta$.
 \end{definition}
+
 \begin{theorem}
     A BCH code $C$ with design distance $\delta$ has minimum distance $d(C) \geq \delta$.
 \end{theorem}
+
 This proof needs the following result.
+
 \begin{lemma}
     The Vandermonde matrix satisfies
     \begin{align*}
@@ -437,6 +516,7 @@ \subsection{BCH codes}
         \end{pmatrix} = \prod_{1 \leq j < i \leq n} (x_i - x_j)
     \end{align*}
 \end{lemma}
+
 \begin{proof}[Proof of theorem]
     Consider
     \begin{align*}
@@ -452,6 +532,7 @@ \subsection{BCH codes}
     But any codeword of $C$ is a dependence relation between the columns of $H$.
     Hence every nonzero codeword has weight at least $\delta$, giving $d(C) \geq \delta$.
 \end{proof}
+
 Note that $H$ in the proof above is not a parity check matrix, as its entries do not lie in $\mathbb F_2$.
 
 Let $C$ be a cyclic code with defining set $\qty{\alpha, \alpha^2, \dots, \alpha^{\delta - 1}}$ where $\alpha \in K$ is a primitive $n$th root of unity.
@@ -459,6 +540,7 @@ \subsection{BCH codes}
 Suppose we send $c \in C$ through the channel, and receive $r = c + e$ where $e$ is the error pattern with at most $t$ nonzero errors.
 Note that $r, c, e$ correspond to polynomials $r(X), c(X), e(X)$, and $c(\alpha^j) = 0$ for $j \in \qty{1, \dots, \delta - 1}$ as $c$ is a codeword.
 Hence, $r(\alpha^j) = e(\alpha^j)$.
+
 \begin{definition}
     The \vocab{error locator polynomial} of an error pattern $e \in \mathbb F_2^n$ is
     \begin{align*}
@@ -466,7 +548,9 @@ \subsection{BCH codes}
     \end{align*}
     where $\mathcal E = \qty{i \mid e_i = 1}$.
 \end{definition}
+
 Assuming that $\deg \sigma = \abs{\mathcal E}$, where $2t + 1 \leq \delta$, we must recover $\sigma$ from $r(X)$.
+
 \begin{theorem}
     Suppose $\deg \sigma = \abs{\mathcal E} \leq t$ where $2t + 1 \leq \delta$.
     Then $\sigma(X)$ is the unique polynomial in $K[X]$ of least degree s.t.
@@ -475,6 +559,7 @@ \subsection{BCH codes}
         \item $\sigma(X) \sum_{j=1}^{2t} r(\alpha^j) X^j = \omega(X)$ mod $X^{2t+1}$ for some $\omega \in K[X]$ of degree at most $t$.
     \end{enumerate}
 \end{theorem}
+
 \begin{proof}
     Define $\omega(X) = -X\sigma'(X)$, called the \vocab{error co-locator}.
     Hence,
@@ -494,7 +579,7 @@ \subsection{BCH codes}
     This verifies (i) and (ii) for this choice of $\omega$, so $\deg \omega = \deg \sigma = \abs{\mathcal E} \leq t$.
 
     For uniqueness, suppose there exist $\widetilde \sigma, \widetilde \omega$ with the properties (i), (ii).
-    Without loss of generality, we can assume $\deg \widetilde \sigma \leq \deg \sigma$.
+    WLOG, we can assume $\deg \widetilde \sigma \leq \deg \sigma$.
     $\sigma(X)$ has distinct nonzero roots, so $\omega(X) = -X\sigma'(X)$ is nonzero at these roots.
     Hence $\sigma, \omega$ are coprime polynomials.
     By property (ii), $\widetilde \sigma(X) \omega(X) = \sigma(X) \widetilde \omega(X)$ mod $X^{2t+1}$.
@@ -502,7 +587,9 @@ \subsection{BCH codes}
     But $\sigma(X)$ and $\omega(X)$ are coprime, so $\sigma \mid \widetilde \sigma$, but $\deg \widetilde \sigma \leq \deg \sigma$ by assumption, so $\widetilde \sigma = \lambda \sigma$ for some $\lambda \in K$.
     By property (i), $\sigma(0) = \widetilde\sigma(0)$ hence $\lambda = 1$, giving $\widetilde \sigma = \sigma$.
 \end{proof}
+
 Suppose that we receive $r(X)$ and wish to decode it.
+
 \begin{itemize}
     \item Compute $\sum_{j=1}^{2t} r(\alpha^j) X^j$.
     \item Set $\sigma(X) = 1 + \sigma_1 X + \dots + \sigma_t X^t$, and compute the coefficients of $X^i$ for $t + 1 \leq i \leq 2t$ to obtain linear equations for $\sigma_1, \dots, \sigma_t$, which are of the form $\sum_0^t \sigma_j r(\alpha^{i-j}) = 0$.
@@ -510,6 +597,7 @@ \subsection{BCH codes}
     \item Compute $\mathcal E = \qty{i \mid \sigma(\alpha^{-i}) = 0}$, and check that $\abs{\mathcal E} = \deg \sigma$.
     \item Set $e(X) = \sum_{i \in \mathcal E} X^i$, then $c(X) = r(X) + e(X)$, and check that $c$ is a codeword.
 \end{itemize}
+
 \begin{example}
     Consider $n = 7$, and $X^7 - 1 = (X + 1)(X^3 + X + 1)(X^3 + X^2 + 1)$ in $\mathbb F_2[X]$.
     Let $g(X) = X^3 + X + 1$, so $h(X) = (X + 1)(X^3 + X^2 + 1) = X^4 + X^2 + X + 1$.
@@ -532,6 +620,7 @@ \subsection{BCH codes}
 \end{example}
 
 \subsection{Shift registers}
+
 \begin{definition}
     A \vocab{(general) feedback shift register} is a map $f \colon \mathbb F_2^d \to \mathbb F_2^d$ given by
     \begin{align*}
@@ -541,6 +630,7 @@ \subsection{Shift registers}
     We say that the register has length $d$.
     The \vocab{stream} associated to an \vocab{initial fill} $(y_0, \dots, y_{d-1})$ is the sequence $y_0, \dots$ with $y_n = C(y_{n-d}, \dots, y_{n-1})$ for $n \geq d$.
 \end{definition}
+
 \begin{definition}
     The general feedback shift register $f \colon \mathbb F_2^d \to \mathbb F_2^d$ is a \vocab{linear feedback shift register} if $C$ is linear, so
     \begin{align*}
@@ -548,17 +638,22 @@ \subsection{Shift registers}
     \end{align*}
     We usually set $a_0 = 1$.
 \end{definition}
+
 The stream produced by a linear feedback shift register is now given by the recurrence relation $y_n = \sum_{i=0}^{d-1} a_i y_{n-d+i}$.
 We can define the auxiliary polynomial $P(X) = X^d + a_{d-1} X^{d-1} + \dots + a_1 X + a_0$.
 We sometimes write $a_d = 1$, so $P(X) = \sum_{i=0}^d a_i X^i$.
+
 \begin{definition}
     The \vocab{feedback polynomial} is $\check{P}(X) = a_0 X^d + \dots + a_{d-1} X + 1 = \sum_{i=0}^d a_{d-i} X^i$.
     A sequence $y_0, \dots$ of elements of $\mathbb F_2$ has \vocab{generating function} $\sum_{j=0}^\infty y_j X^j \in \mathbb F_2\Brackets{X}$.
 \end{definition}
+
 \begin{theorem}
     The stream $(y_n)_{n \in \mathbb N}$ comes from a linear feedback shift register with auxiliary polynomial $P(X)$ iff its generating function is (formally) of the form $\frac{A(X)}{\check{P}(X)}$ with $A \in \mathbb F_2[X]$ s.t. $\deg A < \deg \check{P}$.
 \end{theorem}
+
 Note that $\check{P}(X) = X^{\deg P}P(X^{-1})$.
+
 \begin{proof}
     Let $P(X)$ and $\check{P}(X)$ be as above.
     We require
@@ -570,6 +665,7 @@ \subsection{Shift registers}
     This holds iff $y_n = \sum_{i=0}^{d-1} a_i y_{n-d + i}$ for all $n \geq d$.
     This is precisely the form of a stream that arises from a linear feedback shift register with auxiliary polynomial $P$.
 \end{proof}
+
 The problem of recovering the linear feedback shift register from its stream and the problem of decoding BCH codes both involve writing a power series as a quotient of polynomials.
 
 \subsection{The Berlekamp--Massey method}
diff --git a/CodingAndCryptography/05_cryptography.tex b/CodingAndCryptography/05_cryptography.tex
index 8eec002..2930460 100644
--- a/CodingAndCryptography/05_cryptography.tex
+++ b/CodingAndCryptography/05_cryptography.tex
@@ -216,7 +216,7 @@ \subsection{Rabin cryptosystem}
 \end{lemma}
 \begin{proof}
     Suppose $x_0$ is a solution, so $x_0^2 \equiv d$ mod $p$.
-    Without loss of generality we can assume $x_0 \not\equiv 0$, or equivalently, $x_0 \nmid p$.
+    WLOG we can assume $x_0 \not\equiv 0$, or equivalently, $x_0 \nmid p$.
     Then $x_0^2 \equiv d$ so $d^{2k-1} \equiv x_0^{2(2k-1)} \equiv x_0^{p-1} \equiv 1$.
     Hence, $\qty(d^k)^2 \equiv d$.
 \end{proof}
@@ -281,7 +281,7 @@ \subsection{RSA cryptosystem}
     Hence $x^m \equiv 1$ mod $N$.
     But $m = 2^a b$, so setting $y = x^b$ mod $N$, we obtain $y^{2^a} \equiv 1$ mod $N$.
     In particular, $o_p(y)$ and $o_q(y)$ are powers of 2.
-    Since $x \in X$, $o_p(y) \neq o_q(y)$, so without loss of generality suppose $o_p(y) < o_q(y)$.
+    Since $x \in X$, $o_p(y) \neq o_q(y)$, so wlog suppose $o_p(y) < o_q(y)$.
     Let $o_p(y) = 2^t$, so $0 \leq t < a$.
     Then $y^{2^t} \equiv 1$ mod $p$, but $y^{2^t} \not\equiv 1$ mod $q$.
     So $(y^{2^t} - 1, N) = p$ as required.
diff --git a/CodingAndCryptography/cc.pdf b/CodingAndCryptography/cc.pdf
index fa172ae..3b5601a 100644
Binary files a/CodingAndCryptography/cc.pdf and b/CodingAndCryptography/cc.pdf differ
diff --git a/LogicAndSetTheory/02_well_orderings.tex b/LogicAndSetTheory/02_well_orderings.tex
index 761991c..8872606 100644
--- a/LogicAndSetTheory/02_well_orderings.tex
+++ b/LogicAndSetTheory/02_well_orderings.tex
@@ -685,7 +685,8 @@ \subsection{Ordinal arithmetic}
     Let $\lambda$ be a nonzero limit.
     We have $\alpha + \lambda = \sup\qty{\alpha + \gamma : \gamma < \lambda}$.
     But $\alpha + \gamma = \alpha +' \gamma$ for $\gamma < \lambda$, so $\alpha + \lambda = \sup\qty{\alpha +' \gamma : \gamma < \lambda}$.
-    As the set $\qty{\alpha +' \gamma : \gamma < \lambda}$ is nested, it is equal to its union, which is $\alpha +' \lambda$.
+    % As the sets $\alpha \sqcup \gamma$ are nested, their supremum is their union $\cup_{\gamma < \lambda} (\alpha \sqcup \gamma) = \alpha \sqcup \lambda$ which has OT $\alpha +' \lambda$.
+    As the set $\qty{\alpha +' \gamma : \gamma < \lambda}$ is nested, it's sup is equal to its union, which is $\alpha +' \lambda$.
 \end{proof}
 Synthetic definitions can be easier to work with if such definitions exist.
 However, there are many definitions that can only easily be represented inductively, and not synthetically.
diff --git a/LogicAndSetTheory/04_predicate_logic.tex b/LogicAndSetTheory/04_predicate_logic.tex
index c5a6fd9..d75a708 100644
--- a/LogicAndSetTheory/04_predicate_logic.tex
+++ b/LogicAndSetTheory/04_predicate_logic.tex
@@ -1,6 +1,6 @@
 \section{Predicate Logic}
 \subsection{Languages}
-In Propositional Logic we has a set $P$ of primitive propositions and then we combined them using logical connectives $\implies$, $\bot$, ($\wedge$, $\vee$, $\not$, $\top$) to form the language $L = L(P)$ of all (compound) propositions.
+In Propositional Logic we has a set $P$ of primitive propositions and then we combined them using logical connectives $\implies$, $\bot$, ($\wedge$, $\vee$, $\neg$, $\top$) to form the language $L = L(P)$ of all (compound) propositions.
 
 We attached no meaning to primitive propositions. \\
 \underline{Aim}: To develop languages to describe a wide range of mathematical theorems.
@@ -89,13 +89,17 @@ \subsection{Languages}
 
 An occurrence of a variable $x$ in a formula $p$ is \vocab{bound} if it is inside the brackets of a $(\forall x)$ quantifier.
 Otherwise, we say the occurrence is \vocab{free}.
-In the formula $(\forall x)(m(x,x) = e)$, each occurrence of $x$ is bound.
-In $m(x,x) = e \Rightarrow (\exists y)(m(y,y) = x)$, the occurrences of $x$ are free and the occurrences of $y$ are bound.
-In the formula $m(x,x) = e \Rightarrow (\forall x)(\forall y)(m(x,y) = m(y,x))$, the occurrences of $x$ on the left hand side are free, and the occurrences of $x$ on the right hand side are bound.
+\begin{example}
+    In the formula $(\forall x)(m(x,x) = e)$, each occurrence of $x$ is bound. \\
+    In $m(x,x) = e \Rightarrow (\exists y)(m(y,y) = x)$, the occurrences of $x$ are free and the occurrences of $y$ are bound. \\
+    In the formula $m(x,x) = e \Rightarrow (\forall x)(\forall y)(m(x,y) = m(y,x))$, the occurrences of $x$ on the left hand side are free, and the occurrences of $x$ on the right hand side are bound.
+\end{example}
 
 A \vocab{sentence} is a formula with no free variables.
-For instance, $(\forall x)(m(x,x) = e)$ is a sentence, and $(\forall x)(m(x,x) \Rightarrow (\exists y)(m(y,y) = x))$ is a sentence.
-In the language of posets, $(\forall x)(\exists y)(x \geq y \wedge \neg(x = y))$ is a sentence.
+\begin{example}
+    For instance, $(\forall x)(m(x,x) = e)$ is a sentence, and $(\forall x)(m(x,x) \Rightarrow (\exists y)(m(y,y) = x))$ is a sentence. \\
+    In the language of posets, $(\forall x)(\exists y)(x \geq y \wedge \neg(x = y))$ is a sentence.
+\end{example}
 
 For a formula $p$, term $t$, and variable $x$, the \vocab{substitution} $p[t/x]$ is obtained from $p$ by replacing every free occurrence of $x$ with $t$.
 For example,
@@ -113,16 +117,23 @@ \subsection{Semantic implication}
         \item for each $\varphi \in \Pi$, a subset $\varphi_A \subseteq A^n$ where $n = \alpha(\varphi)$.
     \end{itemize}
 \end{definition}
+
 \begin{remark}
     We will see later why the restriction that $A$ is nonempty is given here.
 \end{remark}
+
 \begin{example}
     In the language of groups, an $L$-structure is a nonempty set $A$ with functions $m_A \colon A^2 \to A, i_A \colon A \to A, e_A \in A$.
     Such a structure may not be a group, as we have not placed any axioms on $A$.
 \end{example}
+
 \begin{example}
     In the language of posets, an $L$-structure is a nonempty set $A$ with a relation $(\leq_A) \subseteq A^2$.
+    This is not yet a poset.
 \end{example}
+
+\underline{Next Step}: to define for a formula $p$ what it means that `$p$ is satisfied in $A$'.
+
 We define the \vocab{interpretation} $p_A \in \qty{0,1}$ of a sentence $p$ in an $L$-structure $A$ as follows.
 \begin{itemize}
     \item The interpretation $t_A$ of a closed term $t$ in an $L$-structure $A$ is defined inductively as $(f\ t_1\dots t_n)_A = f_A({t_1}_A, \dots, {t_n}_A)$ for $f \in \Omega, \alpha(f) = n$, where $t_1, \dots, t_n$ are closed.
@@ -138,8 +149,10 @@ \subsection{Semantic implication}
         \item $((\forall x)p)_A$ is 1 if $p[\overline a/x]$ is 1 for all $a \in A$ and 0 otherwise, where we add a constant symbol $\overline a$ to $L$ for a fixed $a \in A$ to form the language $L'$, and we make $A$ into an $L'$-structure by defining $\overline a_A = a$.
     \end{itemize}
 \end{itemize}
+
 \begin{remark}
     For a formula $p$ with free variables, we can define $p_A$ to be the subset of $A^k$ where $k$ is the number of free variables, defined s.t. $x \in p_A$ iff the substitution of $x$ in $p$ is evaluated to 1.
+
 \end{remark}
 \begin{definition}
     If $p_A = 1$, we say $p$ \vocab{holds} in $A$, or $p$ is \vocab{true} in $A$, or $A$ is a \vocab{model} of $p$.
@@ -147,6 +160,7 @@ \subsection{Semantic implication}
     We say that $A$ is a \vocab{model} of a theory $T$ if $p_A = 1$ for all $p \in T$.
     For a theory $T$ and a sentence $p$, we say that $T \models p$, read $T$ \vocab{entails} or \vocab{semantically implies} $p$, if every model of $T$ is a model of $p$.
 \end{definition}
+
 \begin{example}
     Let $L$ be the language of groups, and let
     \begin{align*}
@@ -158,15 +172,18 @@ \subsection{Semantic implication}
     Note that this statement has two assertions; every $L$-structure that is a model of $T$ is a group, and that every group can be turned into an $L$-structure that models $T$.
     We say that $T$ \vocab{axiomatises} the theory of groups or the class of groups.
 \end{example}
+
 \begin{example}
     Let $L$ be the language of posets, and $T$ be the poset axioms.
     Then $T$ axiomatises the class of posets.
 \end{example}
+
 \begin{example}
     Let $L$ be the language of fields, so $\Omega = \qty{0, 1, +, \cdot, -}$ with $\alpha(0) = \alpha(1) = 0, \alpha(+) = \alpha(\cdot) = 2, \alpha(-) = 1$.
     $T$ is the usual field axioms, including the statement $(\forall x)(\neg (x = 0) \Rightarrow (\exists y)(x \cdot y = 1))$.
     Then $T$ entails the statement that inverses are unique: $(\forall x)(\neg (x = 0) \Rightarrow (\forall y)(\forall z) (y \cdot x = 1 \wedge z \cdot x = 1 \Rightarrow y = z))$.
 \end{example}
+
 \begin{example}
     Let $L$ be the language of graphs, defined by $\Omega = \varnothing$ and $\Pi = \qty{a}$ where $\alpha(a) = 2$ is the adjacency relation.
     Define $T = \qty{(\forall x)(\neg a(x,x)), (\forall x)(\forall y)(a(x,y) \Rightarrow a(y,x))}$.
diff --git a/LogicAndSetTheory/logicandsettheory.pdf b/LogicAndSetTheory/logicandsettheory.pdf
index 86a8092..878ccf3 100644
Binary files a/LogicAndSetTheory/logicandsettheory.pdf and b/LogicAndSetTheory/logicandsettheory.pdf differ
diff --git a/QuantumInfoAndComputing/04_quantum_computation.tex b/QuantumInfoAndComputing/04_quantum_computation.tex
index dde2c62..1ce13c6 100644
--- a/QuantumInfoAndComputing/04_quantum_computation.tex
+++ b/QuantumInfoAndComputing/04_quantum_computation.tex
@@ -33,7 +33,7 @@ \subsection{Classical complexity}
 The class of languages for which the membership problem has a classical poly-time algorithm is called $\mathsf{P}$.
 The class of languages for which the membership problem has a randomised classical poly-time algorithm that gives the correct answer with probability at least $\frac{2}{3}$ is called $\mathsf{BPP}$, short for \vocab{bounded-error probabilistic poly-time}.
 The problem $\mathsf{FACTOR}(M,N)$ which determines if there is a nontrivial factor of $N$ that is at most $M$ does not lie in $\mathsf{BPP}$.
-The best known runtime is $T(n) = O\qty(n^{\frac{1}{3}}\qty(\log n)^\frac{2}{3})$.
+The best known runtime is $T(n) = \exp\qty(O\qty(n^{\frac{1}{3}}\qty(\log n)^\frac{2}{3}))$.
 
 A black box promise problem is a computational task where the input is a \vocab{black box} or \vocab{oracle} which can compute a Boolean function $f \colon B_m \to B_n$, and there is an \vocab{a priori promise} on $f$ restricting the possible values of $f$.
 For example, the black box promise problem for constant vs.\ balanced functions takes a function $f \colon B_n \to B$ such that $f$ is constant or \vocab{balanced}, in which case $f$ is equal to zero for exactly half of the $2^n$ possible inputs.
diff --git a/QuantumInfoAndComputing/qic.pdf b/QuantumInfoAndComputing/qic.pdf
index 180cb85..60c43b5 100644
Binary files a/QuantumInfoAndComputing/qic.pdf and b/QuantumInfoAndComputing/qic.pdf differ
diff --git a/at.pdf b/at.pdf
index 79e2665..f81336f 100644
Binary files a/at.pdf and b/at.pdf differ