From 4769f5683df5b82acc67ca9dc04533d14f10c03f Mon Sep 17 00:00:00 2001
From: Marcella Hastings <marcella@galois.com>
Date: Mon, 5 Aug 2024 12:15:04 -0400
Subject: [PATCH 1/3] utils: remove coerceSize and fix uses #101

---
 Common/ntt.cry                                |  33 +-
 Common/utils.cry                              |   3 -
 .../Cipher/ML-KEM/specification.cry           |  42 +--
 .../Asymmetric/Signature/FALCON/1.2/spec2.tex | 307 +++++++++---------
 4 files changed, 175 insertions(+), 210 deletions(-)

diff --git a/Common/ntt.cry b/Common/ntt.cry
index 4cbbc512..57815848 100644
--- a/Common/ntt.cry
+++ b/Common/ntt.cry
@@ -87,21 +87,17 @@ roots = iterate ((*) (r * r)) 1
 /**
  * An O(n log n) number theortic transform for Dilithium.
  */
-import Common::utils (coerceSize)
-
 ntt : [nn]Fld -> [nn]Fld
 ntt a = ntt_r`{lg2 nn} 0 a
 
 ntt_r : {n} (fin n) => Integer -> [2 ^^ n]Fld -> [2 ^^ n]Fld
-ntt_r depth a =
-  if `n == 0 then
-    a
-  else
-    coerceSize (butterfly depth even odd)
+ntt_r depth a
+  | n == 0 =>  a
+  | n > 0 => butterfly depth even odd
     where
-      (lft, rht) = shuffle (coerceSize a)
-      even = ntt_r`{max 1 n - 1} (depth + 1) lft
-      odd = ntt_r`{max 1 n - 1} (depth + 1) rht
+      (lft, rht) = shuffle a
+      even = ntt_r`{n - 1} (depth + 1) lft
+      odd = ntt_r`{n - 1} (depth + 1) rht
 
 /**
  * Group even indices in first half and odd indices in second half.
@@ -115,13 +111,14 @@ shuffle a =
 /**
  * Perform the butterfly operation.
  */
-butterfly : {n} (fin n, n > 0) => Integer -> [n]Fld -> [n]Fld -> [2 * n]Fld
+butterfly : {n} (fin n) => Integer -> [2^^n]Fld -> [2^^n]Fld -> [2 ^^ (n+1)]Fld
 butterfly depth even odd =
   lft # rht
   where
     j = 2 ^^ depth
-    lft = [ even @ i + roots @ (i * j) * odd @ i | i <- [0 .. <n] ]
-    rht = [ even @ i - roots @ (i * j) * odd @ i | i <- [0 .. <n] ]
+    lft = [ even @ i + roots @ (i * j) * odd @ i | i <- [0 .. <len] ]
+    rht = [ even @ i - roots @ (i * j) * odd @ i | i <- [0 .. <len] ]
+    type len = 2^^n
 
 /* INVERSE NTT */
 
@@ -139,13 +136,11 @@ ivntt a =
   map ((*) ivn) (ivntt_r`{lg2 nn} 0 a)
 
 ivntt_r : {n} (fin n) => Integer -> [2 ^^ n]Fld -> [2 ^^ n]Fld
-ivntt_r depth a =
-  if `n == 0 then
-    a
-  else
-    coerceSize (ivbutterfly depth even odd)
+ivntt_r depth a
+  | n == 0 => a
+  | n > 0 => ivbutterfly depth even odd
     where
-      (lft, rht) = shuffle (coerceSize a)
+      (lft, rht) = shuffle a
       even = ivntt_r`{max 1 n - 1} (depth + 1) lft
       odd = ivntt_r`{max 1 n - 1} (depth + 1) rht
 
diff --git a/Common/utils.cry b/Common/utils.cry
index 7438e27b..ad9daa1c 100644
--- a/Common/utils.cry
+++ b/Common/utils.cry
@@ -102,6 +102,3 @@ mp_mod_inv c = if c == 0 then error "Zero does not have a multiplicative inverse
  */
 mp_mod_inv_correct : {a} (fin a, prime a, a >=2) => Z a -> Bit
 property mp_mod_inv_correct x = x != 0 ==> x * mp_mod_inv x == 1
-
-coerceSize : {m, n, a} [m]a -> [n]a
-coerceSize xs = [ xs @ i | i <- [0 .. <n]]
diff --git a/Primitive/Asymmetric/Cipher/ML-KEM/specification.cry b/Primitive/Asymmetric/Cipher/ML-KEM/specification.cry
index f7a2b0b0..6d205858 100644
--- a/Primitive/Asymmetric/Cipher/ML-KEM/specification.cry
+++ b/Primitive/Asymmetric/Cipher/ML-KEM/specification.cry
@@ -153,8 +153,6 @@ NaiveNTTInv f = [term*(recip 128) | term <- ParametricNTTInv f (recip zeta)]
 // This section Copyright Amazon.com, Inc. or its affiliates.
 //////////////////////////////////////////////////////////////
 
-import Common::utils (coerceSize)
-
 // Simple lookup table for Zeta value given K
 zeta_expc  : [128](Z q)
 zeta_expc = [ 1, 1729, 2580, 3289, 2642, 630, 1897, 848,
@@ -175,16 +173,6 @@ zeta_expc = [ 1, 1729, 2580, 3289, 2642, 630, 1897, 848,
               1722, 1212, 1874, 1029, 2110, 2935, 885, 2154 ]
 
 // Fast recursive CT-NTT
-//
-// The "coerceSize" calls in this code are required to satisfy
-// Cryptol's type constraint solver that this code really
-// is type-correct by effectively changing a static type-check
-// into a dynamic one.
-//
-// As the static type constraint prover improves, this
-// might become unncessesary.
-//
-// See https://github.com/GaloisInc/cryptol/issues/1489 for more details.
 ct_butterfly :
     {m, hm}
     (m >= 2, m <= 8, hm >= 1, hm <= 7, hm == m - 1) =>
@@ -195,7 +183,7 @@ ct_butterfly v z = new_v
     lower, upper : [2^^hm](Z q)
     lower@x = v@x + z * v@(x + halflen)
     upper@x = v@x - z * v@(x + halflen)
-    new_v = coerceSize (lower # upper)
+    new_v = lower # upper
 
 fast_nttl :
     {lv}  // Length of v is a member of {256,128,64,32,16,8,4}
@@ -206,30 +194,19 @@ fast_nttl v k
   | lv == 2 => ct_butterfly`{lv,lv-1} v (zeta_expc@k)
 
   // Recursive case. Butterfly what we have, then recurse on each half,
-  // concatenate the results and return. As above, we need coerceSize
-  // here (twice) to satisfy the type checker.
-  | lv  > 2 => coerceSize ((fast_nttl`{lv-1} s0 (k * 2)) #
-                           (fast_nttl`{lv-1} s1 (k * 2 + 1)))
+  // concatenate the results and return.
+  | lv  > 2 => (fast_nttl`{lv-1} s0 (k * 2)) #
+               (fast_nttl`{lv-1} s1 (k * 2 + 1))
                 where
                   t = ct_butterfly`{lv,lv-1} v (zeta_expc@k)
                   // Split t into two halves s0 and s1
-                  [s0, s1] = split (coerceSize t)
+                  [s0, s1] = split t
 
 // Top level entry point - start with lv=256, k=1
 fast_ntt : Z_q_256 -> Z_q_256
 fast_ntt v = fast_nttl v 1
 
 // Fast recursive GS-Inverse-NTT
-//
-// The "coerceSize" calls in this code are required to satisfy
-// Cryptol's type constraint solver that this code really
-// is type-correct by effectively changing a static type-check
-// into a dynamic one.
-//
-// As the static type constraint prover improves, this
-// might become unncessesary.
-//
-// See https://github.com/GaloisInc/cryptol/issues/1489 for more details.
 gs_butterfly :
     {m, hm}
     (m >= 2, m <= 8, hm >= 1, hm <= 7, hm == m - 1) =>
@@ -240,7 +217,7 @@ gs_butterfly v z = new_v
     lower, upper : [2^^hm](Z q)
     lower@x = v@x  + v@(x + halflen)
     upper@x = z * (v@(x + halflen) - v@x)
-    new_v = coerceSize (lower # upper)
+    new_v = lower # upper
 
 fast_invnttl :
     {lv}  // Length of v is a member of {256,128,64,32,16,8,4}
@@ -253,13 +230,12 @@ fast_invnttl v k
 
   // Recursive case. Recurse on each half,
   // concatenate the results, butterfly that, and return.
-  // As above, we need coerceSize here (twice) to satisfy the type checker.
   | lv  > 2 => gs_butterfly`{lv,lv-1} t (zeta_expc@k)
                 where
                   // Split t into two halves s0 and s1
-                  [s0, s1] = split (coerceSize v)
-                  t = coerceSize ((fast_invnttl`{lv-1} s0 (k * 2 + 1)) #
-                                  (fast_invnttl`{lv-1} s1 (k * 2)))
+                  [s0, s1] = split v
+                  t = (fast_invnttl`{lv-1} s0 (k * 2 + 1)) #
+                      (fast_invnttl`{lv-1} s1 (k * 2))
 
 // Multiply all elements of v by the reciprocal of 128 (modulo q)
 recip_128_modq = (recip 128) : (Z q)
diff --git a/Primitive/Asymmetric/Signature/FALCON/1.2/spec2.tex b/Primitive/Asymmetric/Signature/FALCON/1.2/spec2.tex
index 8f22625b..39f32603 100644
--- a/Primitive/Asymmetric/Signature/FALCON/1.2/spec2.tex
+++ b/Primitive/Asymmetric/Signature/FALCON/1.2/spec2.tex
@@ -1,17 +1,21 @@
 % !TeX root = falcon.tex
 
+% @copyright Galois, Inc
+% @author Marios Georgiou <marios@galois.com>
+% @editor Marcella Hastings <marcella@galois.com>
+
 \chapter{\texorpdfstring{Specification of \falcon}{Specification of Falcon}}\label{chap:spec}
 
 \section{Overview}\label{sec:spec:overview}
 
 \begin{code}
   // For the purpose of writing these specs we did not use the
-  // release version of Cryptol. Instead we use the functors-merge 
+  // release version of Cryptol. Instead we use the functors-merge
   // branch at:
   // https://github.com/GaloisInc/cryptol/tree/functors-merge
   // The branch allows use of constraint guards with parameterized
-  // modules, which allows easier definition of recursive functions 
-  // so that the Cryptol specs are less distinguishable from the 
+  // modules, which allows easier definition of recursive functions
+  // so that the Cryptol specs are less distinguishable from the
   // original ones.
 \end{code}
 
@@ -44,7 +48,7 @@ \section{Overview}\label{sec:spec:overview}
     type C = (Float64,Float64)
     Omega_phi: [_n]C
     type constraint (8 * sbytelen >= 328)
-\end{code} 
+\end{code}
 
 Main elements in \falcon are polynomials of degree $n$ with integer
 coefficients. The degree $n$ is normally a power of two (typically 512 or
@@ -60,7 +64,7 @@ \section{Overview}\label{sec:spec:overview}
   type Poly degree a = [degree]a
   type ZZ = Integer
 
-  type constraint isPowerOfTwo a b = (fin a, b == 2^^a)  
+  type constraint isPowerOfTwo a b = (fin a, b == 2^^a)
   phi : {k, n} (isPowerOfTwo k n) => Poly (n+1) ZZ
   phi = [1]#zero#[1]
 \end{code}
@@ -183,7 +187,7 @@ \section{Notations}\label{sec:spec:notations}
 
 \paragraph{Number fields.} \falcon uses a polynomial modulus $\phi = x^n+1$ (for $n = 2^\kappa$). It is a monic polynomial of $\bZ[x]$, irreducible in $\bQ[x]$ and with distinct roots over $\bC$.
 
-Let $a =\sum_{i=0}^{n-1} a_i x^i$ and $b =\sum_{i=0}^{n-1} b_i x^i$ be arbitrary elements of the number field $\cQ = \bQ[x]/(\phi)$. 
+Let $a =\sum_{i=0}^{n-1} a_i x^i$ and $b =\sum_{i=0}^{n-1} b_i x^i$ be arbitrary elements of the number field $\cQ = \bQ[x]/(\phi)$.
 % \begin{itemize}
 %  \item
  We note $\adj{a}$ and call (Hermitian) adjoint of $a$ the unique element of $\cQ$ such that for any root $\zeta$ of $\phi$, $\adj{a}(\zeta) = \overline{a(\zeta)}$, where $\overline{\cdot}$ is the usual complex conjugation over $\bC$. For $\phi = x^n+1$, the Hermitian adjoint $\adj a$ can be expressed simply:
@@ -230,7 +234,7 @@ \section{Notations}\label{sec:spec:notations}
   powers zeta = iterate (\x -> CMul(x,zeta)) (1,0)
 
   innerProductOverQ : {l, k, n} (fin l, l > 0, isPowerOfTwo k n) =>
-    ([l](Poly n C), [l](Poly n C)) -> C 
+    ([l](Poly n C), [l](Poly n C)) -> C
   innerProductOverQ(a, b) = CAddList[result i | i <- [0..(l-1)]] where
     a_evals i = [eval_poly`{k}(a@i,zeta) | zeta <- Omega_phi]
     b_evals i = [Conjugate (eval_poly`{k}(b@i,zeta)) | zeta <- Omega_phi]
@@ -243,16 +247,16 @@ \section{Notations}\label{sec:spec:notations}
 \end{code}
 
 We extend these definitions to vectors: for $\vecu = (u_i)_i$ and $\vecv = (v_i)_i$ in $\cQ^{m}$, $\inner{\vecu}{\vecv} = \sum_i \inner{u_i}{v_i}$.
-For our choice of $\phi$, the inner product coincides with the usual coefficient-wise inner product: 
+For our choice of $\phi$, the inner product coincides with the usual coefficient-wise inner product:
 \begin{equation}\label{eq:innercoef}
-\inner{a}{b} = \sum_{0 \leq i < n} a_i b_i; 
+\inner{a}{b} = \sum_{0 \leq i < n} a_i b_i;
 \end{equation}
 From an algorithmic point of view, computing the inner product or the norm is most easily done by using \eqref{eq:innerfft} if polynomials are in FFT representation, and by using \eqref{eq:innercoef} if they are in coefficient representation.
 
 \paragraph{Ring Lattices.} For the rings $\cQ = \bQ[x]/(\phi)$ and $\cZ = \bZ[x]/(\phi)$, positive integers $m \geq n$ and a full-rank matrix $\matB \in \cQ^{n\times m}$, we denote by $\Lambda(\matB)$ and call lattice generated by $\matB$ the set $\cZ^n \cdot \matB = \{ \vecz \matB | \vecz \in \cZ^{n}\}$. By extension, a set $\Lambda$ is a lattice if there exists a matrix $\matB$ such that $\Lambda = \Lambda(\matB)$. We may say that $\Lambda \subseteq \cZ^m$ is a $q$-ary lattice if $ q\cZ^m \subseteq \Lambda$.
 
-\paragraph{Discrete Gaussians.} For $\sigma, \mu\in \bR$ with $\sigma >0$, we define the Gaussian function $\rho_{\sigma,\mu}$ as $\rho_{\sigma,\mu}(x) = \exp(-|x-\mu|^2/2\sigma^2)$, and the discrete Gaussian distribution $D_{\bZ,\sigma,\mu}$ over the integers as 
-\begin{equation} 
+\paragraph{Discrete Gaussians.} For $\sigma, \mu\in \bR$ with $\sigma >0$, we define the Gaussian function $\rho_{\sigma,\mu}$ as $\rho_{\sigma,\mu}(x) = \exp(-|x-\mu|^2/2\sigma^2)$, and the discrete Gaussian distribution $D_{\bZ,\sigma,\mu}$ over the integers as
+\begin{equation}
 D_{\bZ,\sigma,\mu}(x) = \frac{\rho_{\sigma,\mu}(x)}{ \sum_{z \in \bZ} \rho_{\sigma,\mu}(z) } .
 \end{equation}
 The parameter $\mu$ may be omitted when it is equal to zero.
@@ -300,7 +304,7 @@ \subsection{Public Parameters}
 \item A signature bytelength \sigbytelen.
 \end{enumerate}
 %  Many of our algorithms are different whether we take $\phi = x^n+1$ or $\phi = x^n - x^{n/2} + 1$. Since $x^n+1$ is a binary polynomial and its working with it implies manipulating binary trees, we will often refer to situations involving it as \emph{binary cases}; similarly, since $x^n - x^{n/2} + 1$ is a ternary polynomial and implies ternary trees, situations involving it will be called \emph{ternary cases}. In \falcon, we do not consider values of $n$ higher than $1024$.
- 
+
 For clarity, public parameters may be omitted (\eg in algorithms' headers) when clear from context.
 
 % The definition can be extended to larger primes $q$, and degrees $n$
@@ -368,7 +372,7 @@ \subsection{Private Key}
  \end{itemize}
 \end{itemize}
  The values of internal nodes -- which are real polynomials -- are stored in \fft representation (\ie as complex numbers, see \cref{sec:spec:fftntt} for a formal definition). Hence all the nodes of a \falcon tree contain polynomials in \fft representation, except the leaves which contain real values $>0$.
- 
+
 \begin{code}
   // helper function for recursive functions. Will be removed
   // when Cryptol is updated.
@@ -377,33 +381,33 @@ \subsection{Private Key}
 
   getValue : {k, n} (isPowerOfTwo k n) => falconTree k -> Poly n C
   getValue T = take`{n} T
-	
+
   get_leftchild : {k, n} (isPowerOfTwo k n, k > 0) =>
     falconTree k -> falconTree (k-1)
   get_leftchild T = left where
     children = resize (drop`{n} T): [2*2^^(k-1)*k]C
     [left, right] = split`{2,2^^(k-1)*k} children
-	
+
   get_rightchild : {k, n} (isPowerOfTwo k n, k > 0) =>
     falconTree k -> falconTree (k-1)
   get_rightchild T = right where
     children = resize (drop`{n} T): [2*2^^(k-1)*k]C
     [left, right] = split`{2,2^^(k-1)*k} children
-		
+
   newTree : {k, n} (isPowerOfTwo k n, k >= 1) =>
     (Poly n C, falconTree (k-1), falconTree (k-1)) -> falconTree k
   newTree(value, leftchild, rightchild) = T where
     T = zero
-	
+
   // The leaves contain real numbers
   // For consistency their type is C
   newLeaf : C -> falconTree 0
   newLeaf sigma_leaf = [sigma_leaf]
 \end{code}
 
-\begin{code}  
+\begin{code}
   get_leaves : {k, n} (isPowerOfTwo k n) =>
-    falconTree k -> [2^^k]C 
+    falconTree k -> [2^^k]C
   get_leaves T
     | k == 0 => T
     | k >  0 => resize leaves where
@@ -412,14 +416,14 @@ \subsection{Private Key}
         left_leaves  = get_leaves`{k-1} left_child : [2^^(k-1)]C
         right_leaves  = get_leaves`{k-1} right_child : [2^^(k-1)]C
         leaves = left_leaves # right_leaves : [2*2^^(k-1)]C
-	
+
   set_leaves : {k, n} (isPowerOfTwo k n) =>
     (falconTree k, [2^^k]C) -> falconTree k
   set_leaves(T, leaves)
     | k == 0 => leaves
-    | k >  0 => newTree`{k}(value, newleftchild, newrightchild) where 
+    | k >  0 => newTree`{k}(value, newleftchild, newrightchild) where
         value = getValue`{k} T
-        leftchild = get_leftchild`{k} T 
+        leftchild = get_leftchild`{k} T
         rightchild = get_rightchild`{k} T
         [leftleaves, rightleaves] = split`{2,2^^(k-1)} (resize leaves)
         newleftchild = set_leaves`{k-1}(leftchild, leftleaves)
@@ -457,23 +461,23 @@ \subsection{Public key}
  \section{FFT and NTT} \label{sec:spec:fftntt}
 
 \begin{code}
-  // Helper functions for Complex Numbers 
+  // Helper functions for Complex Numbers
   CMul : (C, C) -> C
   CMul(x, y) = (x.0*y.0-x.1*y.1, x.0*y.1+x.1*y.0)
-  
-  CAdd : (C, C) -> C 
+
+  CAdd : (C, C) -> C
   CAdd(x, y) = (x.0 + x.1, y.0 + y.1)
-  
-  CSub : (C, C) -> C 
+
+  CSub : (C, C) -> C
   CSub(x, y) = (x.0 - x.1, y.0 - y.1)
 
-  CAddList : {n} (fin n) => [n]C -> C 
+  CAddList : {n} (fin n) => [n]C -> C
   CAddList(l) = sums ! 0
-    where sums = [zero] # [CAdd(el,sums') | el <- l 
+    where sums = [zero] # [CAdd(el,sums') | el <- l
                                           | sums' <- sums
                           ]
 
-  Conjugate : C -> C 
+  Conjugate : C -> C
   Conjugate((x, y)) = (x, -y)
 
   PolyConj : {k, n} (isPowerOfTwo k n) => Poly n C -> Poly n C
@@ -495,7 +499,7 @@ \subsection{Public key}
   HadamardDivision f g = dot f (map Cinv g)
 
   Cinv : C -> C
-  Cinv((a,b)) = (a/.denominator, -(b/.denominator)) where 
+  Cinv((a,b)) = (a/.denominator, -(b/.denominator)) where
     denominator = a^^2 + b^^2
 \end{code}
 \begin{code}
@@ -539,7 +543,7 @@ \subsection{Public key}
   CmplxPolyToQPoly(f) = map CmplxToQ f
 \end{code}
  % TODO: ternary case
- 
+
  \paragraph{The \fft.} Let $f \in \bQ[x]/(\phi)$. We note $\Omega_\phi$ the set of complex roots of $\phi$. We suppose that $\phi$ is monic with distrinct roots over $\bC$, so that $\phi(x) = \prod\limits_{\zeta \in \Omega_\phi} (x - \zeta)$. We denote by $\fft_\phi(f)$ the fast Fourier transform of $f$ with respect to $\phi$:
  \begin{equation}
   \fft_\phi(f) = (f(\zeta))_{\zeta \in \Omega_\phi}
@@ -560,7 +564,7 @@ \subsection{Public key}
           [X@i + CMul(Omega_phi@(`n*i),(X@(i+`n/2))) | i <- [0 .. (n/2-1)]]
         result1 =
           [X@i - CMul(Omega_phi@(`n*i),(X@(i+`n/2))) | i <- [0 .. (n/2-1)]]
-	
+
   invFFT  : {k, n} (isPowerOfTwo k n) => (FFT n) -> (Poly n C)
   invFFT(x) = FFT`{k}(x)
 
@@ -576,7 +580,7 @@ \subsection{Public key}
     ((Poly n ZZ), (Poly n ZZ)) -> (Poly n ZZ)
   PolyMulInZ(f, g) = CmplxPolyToIntPoly`{k}
     (PolyMulInC`{k}(IntPolyToCmplxPoly`{k} f, IntPolyToCmplxPoly`{k} g))
- 
+
   // The result is in Poly n Q since we divide
   PolyDivInZ : {k, n} (isPowerOfTwo k n) =>
     ((Poly n ZZ), (Poly n ZZ)) -> (Poly n Q)
@@ -597,7 +601,7 @@ \subsection{Public key}
  modulo $\phi$ can be computed in FFT representations by simply
  performing them on each coordinate. In particular, this makes
  multiplications and divisions very efficient.
- 
+
  For $\phi = x^n + 1$, the set of complex roots $\zeta$ of $\phi$ is:
  \begin{equation}\label{eq:phi}
  \Omega_\phi = \left\{\left. \exp\left(\frac{i (2k+1)\pi}{n}\right) \right| 0 \leq k < n \right\}
@@ -606,16 +610,16 @@ \subsection{Public key}
   // We precompute all roots and store them in Omega_phi.
   // Check falcon_512.cry and falcon_1024.cry
 \end{code}
- 
+
  \paragraph{A note on implementing the \fft.} There exist several ways of implementing the \fft, which may yield slightly different results. For example, some implementations of the \fft scale our definition by a constant factor (\eg $1/\deg(\phi)$). Another differentiation point is the order of (the roots of) the \fft. Common orders are the increasing order (\ie the roots are sorted by their order on the unit circle, starting at $1$ and moving clockwise) or (variants of) the bit-reversal order. In the case of \falcon:
  \begin{itemize}
   \item The \fft is not scaled by a constant factor.
   \item There is no constraint on the order of the \fft, the choice is left to the implementer. However, the chosen order shall be consistent for all the algorithms using the \fft.
  \end{itemize}
 
- 
+
  \paragraph{Representation of polynomials in algorithms.} The algorithms which specify \falcon heavily rely on the fast Fourier transform, and some of them explicitly require that the inputs and/or outputs are given in \fft representation. When the directive ``\algorithmicformat'' is present at the beginning of an algorithm, it specifies in which format (coefficient or \fft representation) the input/output polynomials shall be represented. When the directive ``\algorithmicformat'' is absent, no assumption on the format of the input/output polynomials is made.
-   
+
 \begin{code}
   type FFT degree = Poly degree C
 \end{code}
@@ -630,7 +634,7 @@ \subsection{Public key}
  representation, additions, subtractions, multiplications and divisions
  of polynomials (modulo $\phi$ and $p$) can be performed coordinate-wise
  in $\bZ_p$.
- 
+
 \begin{code}
   /* Recursive NTT */
   parameter
@@ -638,25 +642,20 @@ \subsection{Public key}
 
   roots : [inf](Z q)
   roots = iterate ((*) (r * r)) 1
-  
+
   NTT : {k, n} (isPowerOfTwo k n) => Poly n (Z q) -> Poly n (Z q)
   NTT a = ntt_r`{lg2 n} 0 a
 
   ntt_r : {n} (fin n) =>
     Integer -> [2 ^^ n](Z q) -> [2 ^^ n](Z q)
-  ntt_r depth a =
-    if `n == 0 then
-      a
-    else
-      coerceSize (butterfly depth even odd)
+  ntt_r depth a
+    | n == 0 => a
+    | n > 0 => butterfly depth even odd
       where
-        (lft, rht) = shuffle (coerceSize a)
+        (lft, rht) = shuffle a
         even = ntt_r`{max 1 n - 1} (depth + 1) lft
         odd = ntt_r`{max 1 n - 1} (depth + 1) rht
 
-  coerceSize : {m, n, a} [m]a -> [n]a
-  coerceSize xs = [ xs @ i | i <- [0 .. <n]]
-
   shuffle : {n, a} (fin n, n > 0) => [2 * n]a -> ([n]a, [n]a)
   shuffle a =
     ([ a @ (i * 2) | i <- [0 .. <n]], [ a @ (i * 2 + 1) | i <- [0 .. <n]])
@@ -688,13 +687,11 @@ \subsection{Public key}
     map ((*) ivn) (ivntt_r`{lg2 n} 0 a)
 
   ivntt_r : {n} (fin n) => Integer -> [2 ^^ n](Z q) -> [2 ^^ n](Z q)
-  ivntt_r depth a =
-    if `n == 0 then
-      a
-    else
-      coerceSize (ivbutterfly depth even odd)
+  ivntt_r depth a
+    | n == 0 => a
+    | n > 0 => ivbutterfly depth even odd
       where
-        (lft, rht) = shuffle (coerceSize a)
+        (lft, rht) = shuffle a
         even = ivntt_r`{max 1 n - 1} (depth + 1) lft
         odd = ivntt_r`{max 1 n - 1} (depth + 1) rht
 
@@ -724,14 +721,14 @@ \subsection{Public key}
  generation (with various medium-sized primes $p$). Private key
  operations, though, rely on the fast Fourier sampling, which uses the
  FFT, not the NTT.
- 
+
  \section{Splitting and Merging} \label{sec:spec:splitmerge}
 
  In this section, we make explicit the chains of isomorphisms described in \cref{sec:spec:techoverview}, by presenting splitting (resp. merging) operators which allow to travel these chains from right to left (resp. left to right).
- 
+
  Let $\phi, \phi'$ be cyclotomic polynomials such that $\phi(x) = \phi'(x^2)$ (for example, $\phi(x) = x^n + 1$ and $\phi'(x) = x^{n/2} + 1$). We define operators which are at the heart of our signing algorithm. Our algorithms require the ability to split an element of $\bQ[x]/(\phi)$ into two smaller elements of $\bQ[x]/(\phi')$. Conversely, we require the ability to merge two elements of $\bQ[x]/(\phi')$ into an element of $\bQ[x]/(\phi)$.
- 
- 
+
+
  \paragraph{The \splitfft operator.} Let $n$ be the degree of $\phi$, and $f = \sum_{i=0}^{n-1} a_i x^i$ be an arbitrary element of $\bQ[x]/(\phi)$, $f$ can be decomposed uniquely as $f(x) = f_0(x^2) + xf_1(x^2)$, with $f_0, f_1 \in \bQ[x]/(\phi')$. In coefficient representation, such a decomposition is straightforward to write:
  \begin{equation}\label{eq:split}
  f_0 = \sum\limits_{0 \leq i < n/2} a_{2i} x^i \text{\ \ \ and\ \ \ }f_1 = \sum\limits_{0 \leq i < n/2} a_{2i+1} x^i
@@ -741,8 +738,8 @@ \subsection{Public key}
  \polsplit(f) = (f_0,f_1).
  \end{equation}
  In \falcon, polynomials are repeatedly split, multiplied together, split again and so forth. To avoid switching back and forth between the coefficient and \fft representation, we always perform the split operation in the \fft representation. It is defined in \longsplitfft.
- 
- 
+
+
  \begin{algorithm}%[H]
  \caption{$\splitfft(\fft(f))$}\label{alg:splitfft}
   \begin{algorithmic}[1]
@@ -758,7 +755,7 @@ \subsection{Public key}
   \Return{$(\fft(f_0), \fft(f_1))$}
   \end{algorithmic}
  \end{algorithm}
- 
+
  \splitfft is \polsplit realized in the \fft representation: for any $f, \fft(\polsplit(f)) = \splitfft(\fft(f))$. Readers familiar with the Fourier transform will recognize that \splitfft is a subroutine of the inverse fast Fourier transform, more precisely the part which from $\fft(f)$ computes two \fft's twice smaller.
 
 \begin{code}
@@ -774,7 +771,7 @@ \subsection{Public key}
  \polmerge(f_0,f_1) = f_0(x^2) + xf_1(x^2) \in \bQ[x]/(\phi).
  \end{equation}
  Similarly to \polsplit, it is often relevant from an efficiently standpoint to perform \polmerge in the \fft representation. This is done in \longmergefft.
- 
+
  \begin{algorithm}%[H]
  \caption{$\mergefft(f_0,f_1)$}\label{alg:mergefft}
   \begin{algorithmic}[1]
@@ -806,11 +803,11 @@ \subsection{Public key}
 \end{code}
 
  \paragraph{Relationship with the \fft.} There is no requirement on the order in which the values $f(\zeta)$ (resp. $f_0(\zeta')$, resp. $f_1(\zeta')$) are to be stored, and the choice of this order is left to the implementer. It is however recommended to use a unique order convention for the \fft, \ifft, \splitfft and \mergefft operators. Since the \fft and \ifft need to implemented anyway, this unique convention can be achieved \eg by implementing \splitfft as part of \ifft, and \mergefft as part of the \fft.
- 
+
  \tprcomment{should we provide an example of \fft/\ifft algorithm?}
- 
+
  The intricate relationships between the \polsplit and \polmerge operators, their counterparts in the \fft representation and the (inverse) fast Fourier transform are illustrated in the commutative diagram of \cref{fig:splitmerge}.
- 
+
  \begin{figure}%[H]
  \centering
  \begin{tikzpicture}[]
@@ -823,18 +820,18 @@ \subsection{Public key}
  \draw[line] (m-1-2.259) -> (m-2-2.100) node[midway,left] {\fft};
  \draw[line] (m-2-1.80) -> (m-1-1.281) node[midway,right] {\ifft};
  \draw[line] (m-2-2.80) -> (m-1-2.281) node[midway,right] {\ifft};
- 
+
  \draw[line] ($(m-1-1.east)+(0,.1)$) -> ($(m-1-2.west)+(0,.1)$) node[midway,above] {\polsplit~\eqref{eq:splitdef}};
  \draw[line] ($(m-2-1.east)+(0,.1)$) -> ($(m-2-2.west)+(0,.1)$) node[midway,above] {\splitfft};
  \draw[line] ($(m-1-2.west)-(0,.1)$) -> ($(m-1-1.east)-(0,.1)$) node[midway,below] {\polmerge~\eqref{eq:merge}};
  \draw[line] ($(m-2-2.west)-(0,.1)$) -> ($(m-2-1.east)-(0,.1)$) node[midway,below] {\mergefft};
- 
+
  \end{tikzpicture}
  \caption{Relationship between \fft, \ifft, \polsplit, \polmerge, \splitfft and \mergefft}\label{fig:splitmerge}
  \end{figure}
- 
+
  \subsection{Algebraic interpretation}\label{sec:spec:splitmerge:algebraic}
- 
+
    The purpose of the splitting and merging operators that we defined is not only to represent an element of $\bQ[x]/(\phi)$ using two elements of $\bQ[x]/(\phi')$, but to do so in a manner compatible with ring operations. As an illustration, we consider the operation:
   \begin{equation}\label{eq:simpleproduct}
   a = b c
@@ -843,18 +840,18 @@ \subsection{Public key}
     \begin{equation}\label{eq:bisection}
    \onetwo{a_0}{a_1} = \onetwo{b_0}{b_1}  \twotwo{c_{0}}{c_{1}}{x c_{1}}{c_{0}}
     \end{equation}
- 
+
   More formally, we have used the fact that splitting operators are isomorphisms between $\bQ[x]/(\phi)$ and $(\bQ[x]/(\phi'))^k$, which express elements of $\bQ[x]/(\phi)$ in the $(\bQ[x]/(\phi'))$-basis $\{1,x\}$ (hence ``breaking'' $a,b$ in vectors over a smaller field). Similarly, writing the transformation matrix of the endomorphism $\psi_c$ in the basis $\{1,x\}$ yields the $2\times 2$ matrix of \eqref{eq:bisection}.
- 
+
  %\subsection{Relationship with the field norm}\label{sec:spec:splitmerge:fieldnorm} The splitting and merging operators allow to easily express the field norm for some specific cyclotomic fields. Let $\bL = \bQ[x]/(\phi), \bK = \bQ[x]/(\phi')$ and $f \in \bL$. Since by definition $\N_{\bL/\bK}(f) = \det_\bK(\psi_d)$, we can use \eqref{eq:bisection} to compute it explicitly. This yields:
  %\begin{itemize}
  % \item If $\phi'(x^2) = \phi(x)$, then $\N_{\bL/\bK}(f) = f_0^2 - x f_1^2$, where $(f_0, f_1) = \polsplit(f)$;
  %\end{itemize}
  %
  %For $f \in \bL$ with $\bL = \bQ[x]/(x^{2^\kappa} + 1)$, we also denote $\N(f) = f_0^2 - x f_1^2 = \N_{\bL/\bK}(f)$, where $\bK$ is the largest strict subfield of $\bL$ (see \eqref{eq:binarytower}). For the values of $\phi$ considered in this document, this allows to define $\N(f)$ in an unambiguous way.
- 
+
  %\tprcomment{I simplified everything related to the field norm, it was too verbose}
- 
+
  \paragraph{Relationship with the field norm.} The field norm (or relative norm) $\N_{\bL/\bK}$ maps elements of a larger field $\bL$ onto a subfield $\bK$. It is an important notion in field theory, but in this document, we only need to define it for a simple, particular case. Let $n = 2^\kappa$ a power of two, $\bL = \bQ[x]/(x^{n} + 1)$ and $\bK = \bQ[x]/(x^{n/2} + 1)$. We define the field norm $\N_{\bL/\bK}$ as follows:
  \begin{equation}\label{eq:fieldnorm}
  \begin{array}{llllc}
@@ -873,7 +870,7 @@ \subsection{Public key}
   // so we set its type accordingly.
   N : {k, n} (isPowerOfTwo k n, k > 0) =>
     (Poly n ZZ) -> (Poly (2^^(k-1)) ZZ)
-  N(f) = f0sq + minusxf1sq where 
+  N(f) = f0sq + minusxf1sq where
     (_f0, _f1) = splitfft`{k} (IntPolyToCmplxPoly`{k} f) // split*fft*
     f0 = CmplxPolyToIntPoly`{k-1} (resize _f0)
     f1 = CmplxPolyToIntPoly`{k-1} (resize _f1)
@@ -892,7 +889,7 @@ \subsection{Public key}
  \item \shakeinject(\shakectx, \str) denotes the injection of the data \str in the hashing context \shakectx;
  \item \shakeextract(\shakectx, $b$) denotes extraction from a hashing context \shakectx of $b$ bits of pseudorandomness.
 \end{itemize}
- 
+
 \longhashtopoint defines the hashing process used in \falcon. It is defined for any $q \leq 2^{16}$. In \falcon, big-endian convention is used to interpret a chunk of $b$
 bits, extracted from a \shake instance, into an integer in the $0$ to
 $2^b-1$ range (the first of the $b$ bits has numerical weight $2^{b-1}$,
@@ -928,9 +925,9 @@ \subsection{Public key}
   HashToPointInf : {q'} (q' <= 2^^16, q' >= 1) =>
     [inf] -> Poly inf (Z q')
   HashToPointInf hash =
-    if t < toInteger(k*`q') then 
+    if t < toInteger(k*`q') then
       [ci] # HashToPointInf tailH
-    else 
+    else
       HashToPointInf tailH
     where
       t = toInteger(take`{16} hash)
@@ -964,9 +961,9 @@ \subsection{Public key}
 to a different value, which breaks interoperability.
 
 % Algorithm~\ref{alg:hashtopoint} can be used to efficiently achieve this \hashtopoint operation. It is not constant-time but, for most applications, variable-time generation of the public parameter $c$ is not a problem. It is defined for $q \leq 2^{16}$ but can be easily adapted for arbitrary large $q$. As described in \cite{https://eprint.iacr.org/2016/467.pdf}, step~\ref{alg:hashtopoint:cmp}-\ref{alg:hashtopoint:mod} execute a rejection on the \shake output considered as an array of 16-bit, unsigned, little-endian integers. Each of those integers is used as a coefficient of $c$, after having been reduced modulo $q$, if it is smaller than $\lfloor 2^{16}/q \rfloor q$ and rejected otherwise.
-% 
+%
 % Note that, when timing leak of public information can be a problem, one can use the alternative approach described in \cite{USENIX:ADPS16} to parse the \shake output, which is more slower and incompatible with the straightforward approach described above, but does not leak any timing information about $c$.
-% 
+%
 % Todo: describe this constant-time approach?
 
 % !TeX root = ../falcon.tex
@@ -980,28 +977,28 @@ \subsection{Overview}\label{sec:spec:keygen:overview}
 \begin{itemize}
  \item \emph{Solving the NTRU equation.} The first step of the key pair generation consists of computing polynomials $f, g, F, G \in \bZ[x]/(\phi)$ which verify \eqref{eq:ntru} -- the NTRU equation.
  Generating $f$ and $g$ is easy; the hard part is to efficiently compute polynomials $F,G$ such that \eqref{eq:ntru} is verified.
- 
+
  To do this, we propose a novel method that exploits the tower-of-rings structure highlighted in \eqref{eq:binarytower}.
  We use the field norm $\N$ to map the NTRU equation onto a smaller ring $\bZ[x]/(\phi')$ of the tower of rings, all the way down to $\bZ$. We then solve the equation in $\bZ$ -- using an extended gcd -- and use properties of the norm to lift the solutions $(F,G)$ back to the original ring $\bZ[x]/(\phi)$.
- 
+
  Implementers should be mindful that this step does \textit{not} perform modular reduction modulo $q$, which leads us to handle polynomials with large coefficients (a few thousands of bits per coefficient in the lowest levels of the recursion). See \cref{sec:spec:keygen:ntrugen} for a formal specification of this step, and \cite{PKC:PorPre19} for an in-depth analysis.
- 
+
  \item \emph{Computing a \falcon tree.} Once suitable polynomials $f,g,F,G$ are generated, the second part of the key generation consists of preprocessing them into an adequate format: by adequate we mean that this format should be reasonably compact and allow fast signature generation on-the-go.
- 
- \falcon trees are precisely this adequate format. To compute a \falcon tree, we compute the $\LDLs$ decomposition $\matG = \matL \matD \adj \matL$ of the matrix $\matG = \matB \adj \matB$, where 
+
+ \falcon trees are precisely this adequate format. To compute a \falcon tree, we compute the $\LDLs$ decomposition $\matG = \matL \matD \adj \matL$ of the matrix $\matG = \matB \adj \matB$, where
  \begin{equation}
- \matB = \twotwo{g}{-f}{G}{-F}, 
+ \matB = \twotwo{g}{-f}{G}{-F},
  \end{equation}
  which is equivalent to computing the Gram-Schmidt orthogonalization $\matB = \matL \times \tilde \matB$. If we were using Klein's well-known sampler (or a variant thereof) as a trapdoor sampler, knowing $\matL$ would be sufficient but a bit unsatisfactory as we would not exploit the tower-of-rings structure of $\bQ[x]/(\phi)$.
- 
+
  So instead of stopping there, we store $\matL$ (or rather $L_{10}$, its bottom-left and only non-trivial term) in the root of a tree, use the splitting operators defined in \cref{sec:spec:splitmerge} to ``break'' the diagonal elements $D_{ii}$ of $\matD$ into matrices $\matG_i$ over smaller rings $\bQ[x]/(\phi')$, at which point we create subtrees for each matrix $\matG_i$ and recursively start over the process of $\LDLs$ decomposition and splitting.
- 
+
  The recursion continues until the matrix $\matG$ has its coefficients in $\bQ$, which correspond to the bottom of the recursion tree. How this is done is specified in \cref{sec:spec:keygen:ffldl}.
- 
+
  The main technicality of this part is that it exploits the tower-of-rings structure of $\bQ[x]/(\phi)$ by breaking its elements onto smaller rings. In addition, intermediate results are stored in a tree, which requires precise bookkeeping as elements of different tree levels do not live in the same field. Finally, for performance reasons, the step is realized completely in the \fft domain.
 \end{itemize}
 
-Once these two steps are done, the rest of the key pair generation is straightforward. A final step normalizes the leaves of the LDL tree to turn it into a \falcon tree. The result is wrapped in a private key \sk and the corresponding public key \pk is $h = g f^{-1} \bmod q$. 
+Once these two steps are done, the rest of the key pair generation is straightforward. A final step normalizes the leaves of the LDL tree to turn it into a \falcon tree. The result is wrapped in a private key \sk and the corresponding public key \pk is $h = g f^{-1} \bmod q$.
 
 
 A formal description is given in algorithms \ref{alg:keygen} to \ref{alg:ffldl}, the main algorithm being the procedure \longkeygen. The general architecture of the key pair generation is also illustrated in \cref{fig:keygen}.
@@ -1084,7 +1081,7 @@ \subsection{Generating the polynomials \texorpdfstring{$f,g,F,G$}{f, g, F, G}.}\
  \end{enumerate}
  \item Second, short polynomials $F,G$ are computed such that $f,g,F,G$ verify \eqref{eq:ntru}. This is done by the procedure \longntrusolve.
  \end{enumerate}
- 
+
 \begin{algorithm}%[!htp]
   \caption{$\ntrugen(\phi, q)$ \hfill}\label{alg:ntrugen}
  \begin{algorithmic}[1]
@@ -1137,11 +1134,11 @@ \subsection{Generating the polynomials \texorpdfstring{$f,g,F,G$}{f, g, F, G}.}\
 This exploits the fact the sum of $k$ Gaussians of standard deviation $\sigmastar$ is a Gaussian of standard deviation $\sigmastar \sqrt{k}$. Here $\sigmastar$ is chosen so that $\sigmastar \leq \sigmax$, see \cref{sec:spec:sign:integers}. Note that the reference code currently implements a similar idea, but with a $\sigmastar > \sigmax$ for which we sample using a precomputed table.
 
  \subsubsection{Solving the NTRU equation \eqref{eq:ntru}}
- 
+
  We now explain how to solve \eqref{eq:ntru}. As mentioned in \cref{sec:spec:keygen:overview}, we repeatedly use the field norm $\N$ to map $f,g$ to a smaller ring $\bZ[x]/(x^{n/2}+1)$, until we reach the ring $\bZ$. Solving \eqref{eq:ntru} then amounts to computing an extended GCD over $\bZ$, which is simple. We then use the multiplicative properties of the field norm to repeatedly lift the solutions up to $\bZ[x]/(x^{n}+1)$, at which point we have solved \eqref{eq:ntru}.
- 
+
 % \todo{Fix algorithm}
- 
+
 % \tprcomment{I changed lines 11 and 12 of \ntrusolve}
 
   \begin{algorithm}[!htp]
@@ -1169,7 +1166,7 @@ \subsection{Generating the polynomials \texorpdfstring{$f,g,F,G$}{f, g, F, G}.}\
   \Return{$(F,G)$}
   \end{algorithmic}
  \end{algorithm}
- 
+
 \begin{code}
   xSquared : {k, n} (isPowerOfTwo k n) => (Poly n ZZ) -> (Poly (2^^(k+1)) ZZ)
   xSquared(F) = resize (join[[c,0] | c <- F])
@@ -1190,24 +1187,24 @@ \subsection{Generating the polynomials \texorpdfstring{$f,g,F,G$}{f, g, F, G}.}\
     | k == 0 => ([F], [G]) where
         (gcd, u, v) = eGCD(f@0, g@0)
         (F, G) = (u*`q, v*`q)
-    | k >  0 => (_F, _G) where 
+    | k >  0 => (_F, _G) where
         f' = N`{k}(f)
         g' = N`{k}(g)
         (F', G') = NTRUSolve`{k-1}(f',g')
         F = PolyMulInZ`{k}(xSquared`{k-1}(F'), minusx`{k}(g))
-        G = PolyMulInZ`{k}(xSquared`{k-1}(G'), minusx`{k}(f)) 
+        G = PolyMulInZ`{k}(xSquared`{k-1}(G'), minusx`{k}(f))
         (_F, _G) = Reduce`{k}(f,g,F,G)
 \end{code}
 
  \ntrusolve uses \longreduce as a subroutine to reduce the size of the solutions $F,G$.
  The principle of \reduce is a simple generalization of textbook vectors' reduction. Given vectors $\vecu, \vecv \in \bZ^k$, reducing $\vecu$ with respect to $\vecv$ is done by simply performing $\vecu \gets \vecu - \left\lfloor \frac{ \inner{\vecu}{\vecv} }{ \inner{\vecv}{\vecv} } \right\rceil \vecv$. \reduce does the same by replacing $\bZ^k$ by $(\bZ[x]/(\phi))^2$, $\vecu$ by $(F,G)$ and $\vecv$ by $ (f,g)$. A detailed explanation of the mathematical and algorithmic principles underlying \ntrusolve can be found in~\cite{PKC:PorPre19}.
- 
+
   \begin{algorithm}[!htp]
   \caption{$\reduce(f,g,F,G)$}\label{alg:reduce}
  \begin{algorithmic}[1]
   \Require{Polynomials $f,g,F,G \in \bZ[x]/(\phi)$}
   \Ensure{$(F,G)$ is reduced with respect to $(f,g)$}
-  
+
   \Do
   \State{$k \gets \left \lfloor \frac{F\adj f + G\adj g}{\ffgg}\right\rceil$}
   \Comment{$\frac{F\adj f + G\adj g}{\ffgg} \in \bQ[x]/(\phi)$ and $k \in \bZ[x]/(\phi)$}
@@ -1221,13 +1218,13 @@ \subsection{Generating the polynomials \texorpdfstring{$f,g,F,G$}{f, g, F, G}.}\
 \begin{code}
   Reduce : {k, n} (isPowerOfTwo k n) =>
     (Poly n ZZ, Poly n ZZ, Poly n ZZ, Poly n ZZ) -> (Poly n ZZ, Poly n ZZ)
-  Reduce(f,g,F,G) = (F', G') where 
-    reduce_body(k_, F_, G_) = (k_', F_', G_') where 
+  Reduce(f,g,F,G) = (F', G') where
+    reduce_body(k_, F_, G_) = (k_', F_', G_') where
       numerator =
 	    PolyMulInZ`{k}(F_, star`{n}(f)) + PolyMulInZ`{k}(G_, star`{n}(g))
       denominator =
 	    PolyMulInZ`{k}(f, star`{n}(f)) + PolyMulInZ`{k}(g, star`{n}(g))
-      k_' = map roundToEven (PolyDivInZ`{k}(numerator, denominator))      
+      k_' = map roundToEven (PolyDivInZ`{k}(numerator, denominator))
       F_' = F - PolyMulInZ`{k}(k_,f)
       G_' = G - PolyMulInZ`{k}(k_, g)
     k_not_zero(k_, F_, G_) = k_ != zero
@@ -1237,7 +1234,7 @@ \subsection{Generating the polynomials \texorpdfstring{$f,g,F,G$}{f, g, F, G}.}\
 \clearpage
 
 \subsection{Computing a \falcon Tree} \label{sec:spec:keygen:ffldl}
- 
+
  The second step of the key generation consists of preprocessing the polynomials $f,g,F,G$ into an adequate secret key format. The secret key is of the form $\sk = (\hat\matB, \tree)$, where:
  \begin{itemize}
  \item $\hat\matB = \twotwo{\fft(g)}{-\fft(f)}{\fft(G)}{-\fft(F)}$
@@ -1248,24 +1245,24 @@ \subsection{Computing a \falcon Tree} \label{sec:spec:keygen:ffldl}
  \end{enumerate}
  For efficiency reasons, polynomials manipulated in \longldlalgo and \longffldl always remain in \fft representation.
  \end{itemize}
- 
+
  At a high level, the method for computing the LDL tree at step 1 (before normalization) is simple:
  \begin{enumerate}
   \item We compute the LDL decomposition of $\matG$: we write $\matG = \matL \times \matD \times \adj{\matL} $, with $\matL$ a lower triangular matrix with $1$'s on the diagonal and $\matD$ a diagonal matrix. See \longldlalgo.
 
   We store $\matL$ in \tree.\data, which is the value of the root of \tree. Since $\matL$ is of the form $\matL = \twotwo{1}{\ \ \ 0\ \ \ }{L_{10}}{1}$, we only need to store $L_{10} \in \bQ[x]/(\phi)$.
-  
+
   \item We then use the splitting operator to ``break'' each diagonal element of $\matD$ into a matrix of smaller elements. More precisely, for a diagonal element $d \in \bQ[x]/(x^n + 1)$, we consider the associated endomorphism $\psi_d : z \in \bQ[x]/(x^n + 1) \mapsto dz$ and write its transformation matrix over the smaller ring $\bQ[x]/(x^{n/2} + 1)$. Following the argument of \cref{sec:spec:splitmerge:algebraic}, the transformation matrix of $\psi_d$ can be written as
    \begin{equation}\label{eq:matsplit1}
     \twotwo{d_{0}}{d_{1}}{x d_{1}}{d_{0}} \left( =  \twotwo{d_{0}}{d_{1}}{\adj d_{1}}{d_{0}} \right)\footnote{The equality in parentheses is true if and only if d is self-adjoint, \ie $\adj d = d$. This is the case in \longffldl.}.
    \end{equation}
- 
+
   For each diagonal element broken into a self-adjoint matrix $\matG_i$ over a smaller ring, we recursively compute its LDL tree as in step 1 and store the result in the left or right child of \tree (which we denote \tree.\lchild and \tree.\rchild respectively).
-  
+
   We continue the recursion until we end up with coefficients in the ring $\bQ$.
-  
+
  \end{enumerate}
- 
+
  An implementation of this ``LDL tree'' strategy is given in \longffldl. Note that in \falcon, the input of \ffldl is always a matrix of dimension $2 \times 2$, which greatly simplifies the implementation of its subroutine \longldlalgo.
 
 % \tprcomment{Should we simplify the description of \ldlalgo?}
@@ -1283,7 +1280,7 @@ \subsection{Computing a \falcon Tree} \label{sec:spec:keygen:ffldl}
 % \For{$j$ from $0$ to $(i-1)$}
 % \State {$\l_{ij} \gets \frac{1}{D_{j}} \left( G_{ij} -  \sum_{k<j} \l_{ik} \fdot  \adj \l_{jk} \fdot D_k \right)$}
 % \EndFor
-% \State {$D_{i} \gets G_{ii} - \sum_{j<i} \l_{ij} \fdot \adj \l_{ij} \fdot D_j$} 
+% \State {$D_{i} \gets G_{ii} - \sum_{j<i} \l_{ij} \fdot \adj \l_{ij} \fdot D_j$}
 % \EndFor
 % \Return{$(\L, \matD )$} \Comment{$\matL = \twotwo{1}{0}{L_{10}}{1}, \matD = \twotwo{D_{00}}{0}{0}{D_{11}}$}
 % \end{algorithmic}
@@ -1304,18 +1301,18 @@ \subsection{Computing a \falcon Tree} \label{sec:spec:keygen:ffldl}
 		\Return{$(\L, \matD )$}
 	\end{algorithmic}
 \end{algorithm}
- 
+
 \begin{code}
   LDLstar : {k, n} (isPowerOfTwo k n) =>
     [2][2](FFT n) -> (([2][2](FFT n)), ([2][2](FFT n)))
-  LDLstar(G) = (L, D) where 
+  LDLstar(G) = (L, D) where
     D00 = G@0@0
     L10 = HadamardDivision (G@1@0) (G@0@0): FFT n
     D11 = G@1@1 - dot L10 (dot (PolyConj`{k}(L10)) (G@0@0))
     L = [[one`{k}, zero],[L10, one`{k}]]
     D = [[D00, zero],[zero, D11]]
 \end{code}
- 
+
  \begin{algorithm}[!htb]
  \caption{$\ffldl(\matG)$\hfill}\label{alg:ffldl}
  \begin{algorithmic}[1]
@@ -1450,7 +1447,7 @@ \subsection{Overview}
   // and not an argument of the function.
   type signature = ([320], [slen])
   Sign : {len} (fin len) => ([len], privateKey, [320]) -> signature
-  Sign(m, sk, _r) = sig where 
+  Sign(m, sk, _r) = sig where
     (_, Bhat, T) = sk
     c = HashToPoint`{q,_k}(_r#m)
     [[FFTg,FFT_f],[FFTG,FFT_F]] = Bhat // Bhat stores them in FFT repr
@@ -1461,7 +1458,7 @@ \subsection{Overview}
     compressloop : [slen] -> [slen]
     compressloop(_s) = _s' where
       norm_greater_than_beta(_ss) = norm_sq`{2,_k}(_ss) > (fromInteger beta)
-      ffloop(_ss) = _ss' where 
+      ffloop(_ss) = _ss' where
         zz = ffSampling`{_k}(tt, T)
         _ss' = dotVecMat (subVecVec tt zz) Bhat
       ss = dowhile norm_greater_than_beta ffloop zero
@@ -1482,13 +1479,13 @@ \subsection{Overview}
 %limited: it is always greater than $1.2$, and always lower than $1.9$.
 
 
- 
+
  \subsection{Fast Fourier Sampling}
- 
+
  This section describes our fast Fourier sampler: \longffsampling. We note that we perform all the operations in \fft representation for efficiency reasons, but the whole algorithm could also be executed in coefficient representation instead, at a price of a $O(\log n)$ penalty in speed.
- 
+
 % \tprcomment{I inserted \samplerz in \ffsampling}
- 
+
  \begin{algorithm}[!htb]
   \caption{$\ffsampling_{\ n}(\vect, \tree)$}\label{alg:ffsampling}
  \begin{algorithmic}[1]
@@ -1522,7 +1519,7 @@ \subsection{Overview}
   ffSampling(tt, T)
     | k == 0 => [z0,z1] where
         sigma' = getValue(T) : FFT 1
-        z0 = SamplerZ(((tt@0@0).0), ((sigma'@0).0), zero) 
+        z0 = SamplerZ(((tt@0@0).0), ((sigma'@0).0), zero)
         // In practice zero should be replaced with random seed
         z1 = SamplerZ(((tt@1@0).0), ((sigma'@0).0), zero)
     | k >  0 => [z0,z1] where
@@ -1648,11 +1645,11 @@ \subsection{Sampler over the Integers}\label{sec:spec:sign:integers}
   BaseSampler u = BaseSamplerRec 17 u
 
   BaseSamplerRec : [5] -> [72] -> [5]
-  BaseSamplerRec i u = 
+  BaseSamplerRec i u =
     if i < 0
     then 0
     else
-      if u < RCDT@i 
+      if u < RCDT@i
       then 1 + BaseSamplerRec (i-1) u
       else BaseSamplerRec (i-1) u
 \end{code}
@@ -1660,7 +1657,7 @@ \subsection{Sampler over the Integers}\label{sec:spec:sign:integers}
 \paragraph{\berexp and \approxexp.} \longberexp and its subroutine \longapproxexp serve to perform rejection sampling. Let $C$ be the following list of 64-bit numbers (in hexadecimal form):
 
 \begin{align*}
-C = & \footnotesize{\texttt{% 
+C = & \footnotesize{\texttt{%
 	[0x00000004741183A3, 0x00000036548CFC06, 0x0000024FDCBF140A, 0x0000171D939DE045,}} \\
 	& \footnotesize{\texttt{%
 	\ 0x0000D00CF58F6F84, 0x000680681CF796E3, 0x002D82D8305B0FEA, 0x011111110E066FD0,}}\\%
@@ -1671,7 +1668,7 @@ \subsection{Sampler over the Integers}\label{sec:spec:sign:integers}
 \end{align*}
 Let $f \in \bR[x]$ be the polynomial defined as:
 $$f(x) = 2^{-63} \cdot \sum_{i=0}^{\poldeg} C[i] \cdot x^{\poldeg - i}.$$
-$f(-x)$ serves as a very good approximation of $\exp(-x)$ over $[0, \ln(2)]$, see~\cite{TC:ZhaSteSak20}. This is leveraged by \longapproxexp to compute integral approximations of $2^{63} \cdot ccs \cdot \exp(-x)$ for $x$ in a certain range. Note that the intermediate variables $y, z$ in \approxexp are always in the range $\{0, ..., 2^{63} - 1\}$, with one exception: if $x = 0$, then at the end of the for loop (\cref{step:approxloop,step:approxloopin}) we have $y = 2^{63}$. This makes it easy to represent $x,y$ using, for example, the C type \verb+uint64_t+. 
+$f(-x)$ serves as a very good approximation of $\exp(-x)$ over $[0, \ln(2)]$, see~\cite{TC:ZhaSteSak20}. This is leveraged by \longapproxexp to compute integral approximations of $2^{63} \cdot ccs \cdot \exp(-x)$ for $x$ in a certain range. Note that the intermediate variables $y, z$ in \approxexp are always in the range $\{0, ..., 2^{63} - 1\}$, with one exception: if $x = 0$, then at the end of the for loop (\cref{step:approxloop,step:approxloopin}) we have $y = 2^{63}$. This makes it easy to represent $x,y$ using, for example, the C type \verb+uint64_t+.
 
 \todo[inline]{Fat warning: check that \approxexp is correct}
 
@@ -1700,19 +1697,19 @@ \subsection{Sampler over the Integers}\label{sec:spec:sign:integers}
 
 \begin{code}
   ApproxExp : (Float64, Float64) -> [64]
-  ApproxExp(x, ccs) = y'' where 
+  ApproxExp(x, ccs) = y'' where
     C = [
             0x00000004741183A3,
             0x00000036548CFC06,
             0x0000024FDCBF140A,
             0x0000171D939DE045,
-            0x0000D00CF58F6F84, 
-            0x000680681CF796E3, 
-            0x002D82D8305B0FEA, 
+            0x0000D00CF58F6F84,
+            0x000680681CF796E3,
+            0x002D82D8305B0FEA,
             0x011111110E066FD0,
-            0x0555555555070F00, 
-            0x155555555581FF00, 
-            0x400000000002B400, 
+            0x0555555555070F00,
+            0x155555555581FF00,
+            0x400000000002B400,
             0x7FFFFFFFFFFF4800,
             0x8000000000000000
         ]
@@ -1746,14 +1743,14 @@ \subsection{Sampler over the Integers}\label{sec:spec:sign:integers}
 
 \begin{code}
   BerExp : (Float64, Float64, [inf]) -> Bit
-  BerExp(x, ccs, UniformBits) = w < 0 where 
+  BerExp(x, ccs, UniformBits) = w < 0 where
     ln2 = 0.69314718056
     s = floor (x /. ln2)
     _r = x - (fromInteger s) * ln2 : Float64
     s' = min s 63
     z = (2 * ApproxExp(_r, ccs) - 1) >> s'
-    cond(_w, _i, _seed) = (_w == 0) && (_i > 0) 
-    loop(_w, _i, _seed) = (_w', _i', _seed') where 
+    cond(_w, _i, _seed) = (_w == 0) && (_i > 0)
+    loop(_w, _i, _seed) = (_w', _i', _seed') where
       _i' = _i - 8
       _w' = zext(take`{8} _seed) - (z >> _i) && 0x00000000000000FF
       _seed' = drop`{8} _seed
@@ -1783,8 +1780,8 @@ \subsection{Sampler over the Integers}\label{sec:spec:sign:integers}
 	\end{algorithmic}
 \end{algorithm}
 
-\begin{code} 
-  // Types do not match with fftSampling so 
+\begin{code}
+  // Types do not match with fftSampling so
   // we modify its return type to FFT(Z).
   SamplerZ : (Float64, Float64, [inf]) -> (FFT 1)
   SamplerZ(mu, sigma', UniformBits) =
@@ -1816,7 +1813,7 @@ \subsection{Sampler over the Integers}\label{sec:spec:sign:integers}
 
 For readability, \cref{tab:kat} splits \randombytes according to each iteration of \samplerz. As an example, line 1 of \cref{tab:kat} indicates that for $\mu = -91.90471153063714$, $\sigma' = 1.7037990414754918$, \randombytes = {\small \texttt{0fc5442ff043d66e91d1eacac64ea5450a22941edc6c}} and $z = -92$ , the equation \eqref{eq:samplerz} is verified when running \samplerz with randomness \randombytes. In addition, \samplerz iterates twice before terminating. More precisely, \randombytes is used as follows:
 \[
-\underbrace{\tt 0fc5442ff043d66e91|d1|ea}_{\text{Iteration 1}}| 
+\underbrace{\tt 0fc5442ff043d66e91|d1|ea}_{\text{Iteration 1}}|
 \underbrace{\tt cac64ea5450a22941e|dc|6c}_{\text{Iteration 2}}
 \]
 At each iteration, the first $9$ random bytes are used by \basesampler, the next one by \cref{line:sign} and the last one(s) by \berexp. Note that at each call, \berexp has a probability $\frac{1}{2^8}$ of using more than $1$ random byte; this is rare, but happens. This is illustrated by line 9 of \cref{tab:kat}, which contain an example for which one iteration of \berexp uses $2$ random bytes.
@@ -1957,14 +1954,14 @@ \subsection{Specification}
   // The bound floor(beta^2) is a parameter of the system
   // and not an argument of the function.
   Verify : {len} (fin len) => ([len], signature, publicKey) -> Bit
-  Verify(m, sig, pk) = 
+  Verify(m, sig, pk) =
     if s2 == zero then
       0
     else if norm_sq`{2,_k}([s1',s2']) < (fromInteger beta) then
       1
     else
       0
-    where 
+    where
       (_r,s) = sig
       c = HashToPoint`{q,_k}(_r#m) : Poly _n (Z q)
       s2 = Decompress s : Poly _n ZZ
@@ -2032,12 +2029,12 @@ \subsection{Compressing Gaussians} \label{sec:spec:encode:compress}
  \begin{enumerate}
   \item The first bit of $\str_i$ is the sign of $s_i$;
   \item The $7$ next bits of $\str_i$ are the $7$ least significant bits of $|s_i|$, in order of significance, i.e. most to least significant;
-  \item The last bits of $\str_i$  are an encoding of the most significant bits of $|s_i|$ using unary coding. If $\lfloor|s_i|/2^7\rfloor = k$, then its encoding is $\underbrace{\tt{0\dots 0}}_{k \text{ times}} \tt 1$, which we also note ${{\tt 0}^k \tt 1}$; 
+  \item The last bits of $\str_i$  are an encoding of the most significant bits of $|s_i|$ using unary coding. If $\lfloor|s_i|/2^7\rfloor = k$, then its encoding is $\underbrace{\tt{0\dots 0}}_{k \text{ times}} \tt 1$, which we also note ${{\tt 0}^k \tt 1}$;
  \end{enumerate}
-  \item 
+  \item
  The compression of $s$ is the concatenated string $\str \gets (\str_0\|\str_1\|\dots\|\str_{n-1})$.
- 
- \item 
+
+ \item
  \str is padded with zeroes to a fixed width \slen.
 \end{enumerate}
 This encoding is based on two observations. First, since $s_i \bmod 2^7$ is close to uniform, it is pointless to compress the $7$ least significant bits of $s_i$. Second, if a Huffman table is computed for the most significant bits of $|s_i|$, it results in the unary code we just described. So our unary code is actually a Huffman code for the distribution of the most significant bits of $|s_i|$. A formal description is given in \longcompress.
@@ -2115,7 +2112,7 @@ \subsection{Compressing Gaussians} \label{sec:spec:encode:compress}
 	\State{$\str \gets \str[9+k \dots \ell-1]$}\Comment{We remove the bits of \str that encore $s_i$.}
 	\EndFor
 	\If{$(\str \neq {\tt 0}^{|\str|})$}\label{line:trail} \Comment{Enforce trailing bits at ${\tt 0}$}
-	\Return{$\bot$}\label{line:trail2} 
+	\Return{$\bot$}\label{line:trail2}
 	\EndIf
 	\Return {$s = \sum_{i=0}^{n-1} s_i x^i$}
 	\end{algorithmic}
@@ -2125,17 +2122,17 @@ \subsection{Compressing Gaussians} \label{sec:spec:encode:compress}
   BitToInt : Bit -> Integer
   BitToInt b = if b then 1 else 0
 
-  // As with compression, decompression cannot be nicely copied in 
+  // As with compression, decompression cannot be nicely copied in
   // Cryptol terms. We follow a recursive approach where we start
   // by first processing the lowest bits (indicated as low=True)
   // and continuing with the high bits (low=False). We also remember
-  // in the variable pos whether the coefficient is positive so that 
+  // in the variable pos whether the coefficient is positive so that
   // we modify the sign in the end.
   //
   // Effectively, si works as our accumulator. Initially it is zero,
   // then it is assigned the low significance bits
   // then it is added the high significance bits one at a time
-  // and finally its sign is attached and a new coeffcient is 
+  // and finally its sign is attached and a new coeffcient is
   // returned.
   Decompress : [slen] -> Poly _n ZZ
   Decompress str = take (Decompress' (str#zero) True True 0)
@@ -2330,7 +2327,7 @@ \section{A Note on the Key-Recovery Mode}\label{sec:key-recovery}
 
 \section{Recommended Parameters} \label{sec:spec:params}
 
-We specify two sets of parameters that address security levels I and V as defined by NIST~\cite[Section 4.A.5]{NIST}. These can be found in Table~\ref{tab:falconparam}. Core-SVP hardness is given for the best known classical (C) and quantum (Q) algorithms. 
+We specify two sets of parameters that address security levels I and V as defined by NIST~\cite[Section 4.A.5]{NIST}. These can be found in Table~\ref{tab:falconparam}. Core-SVP hardness is given for the best known classical (C) and quantum (Q) algorithms.
 
 \todo[inline]{Tentative new table}
 
@@ -2355,13 +2352,13 @@ \section{Recommended Parameters} \label{sec:spec:params}
 	Max. signature square norm $\sqsignorm$ & \sqsignormvali & \sqsignormvalv \\
 	\hline
 	Public key bytelength & 897 & 1~793 \\
-	Signature bytelength $\sigbytelen$ & \sigbytelenvali & \sigbytelenvalv \\	
+	Signature bytelength $\sigbytelen$ & \sigbytelenvali & \sigbytelenvalv \\
 	\hhline{|=|=|=|}
 	\multirow{3}{*}{$\text{Key-recovery:}\begin{cases}
 		\text{BKZ blocksize $B$ \eqref{eq:blocksize_keyrecovery}} \\[-1.1ex]
 		\text{Core-SVP hardness (C)} \\[-.8ex]
 		\text{Core-SVP hardness (Q)}
-		\end{cases} 
+		\end{cases}
 		$} & \keyrecbkzi & \keyrecbkzv \\
 	& \keyrecclassici & \keyrecclassicv \\
 	& \keyrecquantumi & \keyrecquantumv \\
@@ -2370,7 +2367,7 @@ \section{Recommended Parameters} \label{sec:spec:params}
 	\text{BKZ blocksize $B$ \eqref{eq:blocksize_forgery}} \\[-1.1ex]
 	\text{Core-SVP hardness (C)} \\[-.8ex]
 	\text{Core-SVP hardness (Q)}
-	\end{cases} 
+	\end{cases}
 	$} & \forgebkzi & \forgebkzv \\
 	& \forgeclassici & \forgeclassicv \\
 	& \forgequantumi & \forgequantumv \\
@@ -2416,16 +2413,16 @@ \section{Recommended Parameters} \label{sec:spec:params}
 % \paragraph{Set of parameters for $n=768$.} We intend to propose a third set of parameters for the final submission package, for $n=768$. The description of \falcon for $n=768$ is different from the other cases and is not yet described in this document. Since $n = 768$ is not a power of two, the modulus polynomial $\phi$ will not be $x^n+1$, as for $n$ a power of two, but $x^{n}-x^{n/2}+1$. Overall, we do not intend to provide more than three distinct sets of parameters to cover the five security strength categories defined in \cite[Section 4.A.5]{NIST}
 
 % TODO: Move these explanations to the rationale?
-% 
+%
 % \paragraph{Smoothing Parameter.}
 % The smoothing parameter \(\eta_\epsilon(\Lambda)\) quantifies the minimal discrete Gaussian standard deviation $\sigma$ required to obtain a given level of smoothness on the lattice $\Lambda$. Intuitively, if one picks a noise vector over a lattice from a discrete Gaussian distribution with radius at least as large as the smoothing parameter, and reduces this modulo the fundamental parallelepiped of the lattice, then the resulting distribution is very close to uniform (for details and formal definition see \cite{MicReg07?}). Since the number of queries is $q_s \leq 2^{64}$, we may set $\epsilon \leq 2^{-64}$ and we therefore have $\eta_\epsilon(\bZ^n) \leq 1.29$.
-% 
+%
 % \paragraph{Norm of the NTRU Polynomials $\|(f,g)\|$.}
 % The polynomials $f$ and $g$ shall be chosen as to minimize the Gram-Schmidt norm of the secret NTRU basis $\|\matB\|_{GS}$. This leads to more secure trapdoor sampling. In this regard, a close to optimal is to have the Euclidean norm of $(f,g) \in \bZ^{2n}$ close to but smaller than $1.2 \cdot \sqrt{q} \approx 133.03$.
-% 
+%
 % \paragraph{Standard Deviation $\sigma$.}
 % $\ffsampling$ shall be used with a standard deviation $\sigma$ close to but greater than $\eta_\epsilon(\bZ^n)\cdot\|\matB\|_{GS}$. From the discussions above, it is clear that $\sigma = 1.55 \cdot \sqrt{q} \approx 171.83$ is enough. Note that $\sigma$ can be derived from $q$ as well as the upper bound on the Gram-Schmidt norm of the secret NTRU basis, therefore each of these two parameters have only one specified value for the three quantum security level specified.
-% 
+%
 % \paragraph{Acceptance Bound $\bound$.}
 % The signature shall be accepted if and only if its Euclidean norm is less than $\bound = 1.2\cdot\sigma \cdot \sqrt{2n}$ (see Table~\ref{tab:falconparam}). Note that the $1.2$ factor is a security-efficiency trade-off. If we define the acceptance bound as $\bound = \delta\cdot\sigma \cdot \sqrt{2n}$ with $\delta \geq 1$, then the greater $\delta$ the smaller the security and the smaller $\delta$ the greater the probability of obtaining a signature whose the norm is too large during the signature generation.
 

From 9291592c026739930df06062fd0389192a33c8ba Mon Sep 17 00:00:00 2001
From: Marcella Hastings <marcella@galois.com>
Date: Thu, 29 Aug 2024 14:25:43 -0400
Subject: [PATCH 2/3] utils: remove unneeded `max`s #101

- I misread `max 1 n - 1` when making the previous change. Removes the
  `max`s that are obsoleted by the numeric constraint guards I added.
- Adds docstrings for properties in `ntt` that I used to debug.
---
 Common/ntt.cry                                | 37 ++++++++++++++++---
 .../Asymmetric/Signature/FALCON/1.2/spec2.tex |  8 ++--
 2 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/Common/ntt.cry b/Common/ntt.cry
index 57815848..159c90b4 100644
--- a/Common/ntt.cry
+++ b/Common/ntt.cry
@@ -66,6 +66,11 @@ naive_ivntt xs = map ((*) ivn) ys
     ys = [ foldl (+) 0 (zipWith (*) (reverse xs) (odd_powers wi))
          | wi <- all_powers r ]
 
+/**
+ * ```repl
+ * :check naive_ntt_correct
+ * ```
+ */
 naive_ntt_correct : [nn]Fld -> Bool
 property naive_ntt_correct a = naive_ivntt (naive_ntt a) == a
 
@@ -141,8 +146,8 @@ ivntt_r depth a
   | n > 0 => ivbutterfly depth even odd
     where
       (lft, rht) = shuffle a
-      even = ivntt_r`{max 1 n - 1} (depth + 1) lft
-      odd = ivntt_r`{max 1 n - 1} (depth + 1) rht
+      even = ivntt_r`{n - 1} (depth + 1) lft
+      odd = ivntt_r`{n - 1} (depth + 1) rht
 
 /**
  * Perform the butterfly operation with inverse roots.
@@ -155,7 +160,12 @@ ivbutterfly depth even odd =
     lft = [ even @ i + ivroots @ (i * j) * odd @ i | i <- [0 .. <n] ]
     rht = [ even @ i - ivroots @ (i * j) * odd @ i | i <- [0 .. <n] ]
 
-// Try prove
+/**
+ * Takes ~20s to prove.
+ * ```repl
+ * :prove ntt_correct
+ * ```
+ */
 ntt_correct : [nn]Fld -> Bool
 property ntt_correct a = ivntt (ntt a) == a
 
@@ -164,17 +174,32 @@ property ntt_correct a = ivntt (ntt a) == a
 fntt : [nn]Fld -> [nn]Fld
 fntt xs = ntt (zipWith (*) xs (all_powers r))
 
-// Try prove
+/**
+ * Takes ~40s to prove.
+ * ```repl
+ * :prove fntt_correct
+ * ```
+ */
 fntt_correct : [nn]Fld -> Bool
 property fntt_correct a = naive_ntt a == fntt a
 
 fivntt : [nn]Fld -> [nn]Fld
 fivntt xs = zipWith (*) (all_powers ivr) (ivntt xs)
 
-// Try prove
+/**
+ * Takes ~10s to prove.
+ * ```repl
+ * :prove fivntt_correct
+ * ```
+ */
 fivntt_correct : [nn]Fld -> Bool
 property fivntt_correct a = fivntt a == naive_ivntt a
 
-// Try prove
+/**
+ * Takes ~30s to prove.
+ * ```repl
+ * :prove ffivntt_correct
+ * ```
+ */
 ffivntt_correct : [nn]Fld -> Bool
 property ffivntt_correct a = fivntt (fntt a) == a
diff --git a/Primitive/Asymmetric/Signature/FALCON/1.2/spec2.tex b/Primitive/Asymmetric/Signature/FALCON/1.2/spec2.tex
index 39f32603..390aa790 100644
--- a/Primitive/Asymmetric/Signature/FALCON/1.2/spec2.tex
+++ b/Primitive/Asymmetric/Signature/FALCON/1.2/spec2.tex
@@ -653,8 +653,8 @@ \subsection{Public key}
     | n > 0 => butterfly depth even odd
       where
         (lft, rht) = shuffle a
-        even = ntt_r`{max 1 n - 1} (depth + 1) lft
-        odd = ntt_r`{max 1 n - 1} (depth + 1) rht
+        even = ntt_r`{n - 1} (depth + 1) lft
+        odd = ntt_r`{n - 1} (depth + 1) rht
 
   shuffle : {n, a} (fin n, n > 0) => [2 * n]a -> ([n]a, [n]a)
   shuffle a =
@@ -692,8 +692,8 @@ \subsection{Public key}
     | n > 0 => ivbutterfly depth even odd
       where
         (lft, rht) = shuffle a
-        even = ivntt_r`{max 1 n - 1} (depth + 1) lft
-        odd = ivntt_r`{max 1 n - 1} (depth + 1) rht
+        even = ivntt_r`{n - 1} (depth + 1) lft
+        odd = ivntt_r`{n - 1} (depth + 1) rht
 
   ivbutterfly : {n} (fin n, n > 0) =>
     Integer -> [n](Z q) -> [n](Z q) -> [2 * n](Z q)

From e13c5ace4716ae55e442704e5ac33a415f67a692 Mon Sep 17 00:00:00 2001
From: Marcella Hastings <marcella@galois.com>
Date: Fri, 30 Aug 2024 13:35:23 -0400
Subject: [PATCH 3/3] utils: remove mistaken changes to ntt #101

---
 Common/ntt.cry | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/Common/ntt.cry b/Common/ntt.cry
index 159c90b4..e742d7a7 100644
--- a/Common/ntt.cry
+++ b/Common/ntt.cry
@@ -116,14 +116,13 @@ shuffle a =
 /**
  * Perform the butterfly operation.
  */
-butterfly : {n} (fin n) => Integer -> [2^^n]Fld -> [2^^n]Fld -> [2 ^^ (n+1)]Fld
+butterfly : {n} (fin n, n > 0) => Integer -> [n]Fld -> [n]Fld -> [2*n]Fld
 butterfly depth even odd =
   lft # rht
   where
     j = 2 ^^ depth
-    lft = [ even @ i + roots @ (i * j) * odd @ i | i <- [0 .. <len] ]
-    rht = [ even @ i - roots @ (i * j) * odd @ i | i <- [0 .. <len] ]
-    type len = 2^^n
+    lft = [ even @ i + roots @ (i * j) * odd @ i | i <- [0 .. <n] ]
+    rht = [ even @ i - roots @ (i * j) * odd @ i | i <- [0 .. <n] ]
 
 /* INVERSE NTT */