diff --git a/doc/source/flint.rst b/doc/source/flint.rst
index 6237c9f8b1..faafb352d1 100644
--- a/doc/source/flint.rst
+++ b/doc/source/flint.rst
@@ -59,17 +59,6 @@ The file ``flint.h`` contains various useful macros.
     Returns the sign of `x` where `x` is interpreted as a :type:`slong`, that
     is, returns `-1` if `x < 0`, `0` if `x = 0` and `1` if `x > 0`.
 
-.. function:: flint_bitcnt_t FLINT_BIT_COUNT(ulong x)
-
-    Returns the number of binary bits required to represent *x*. If *x* is zero
-    it returns *0*. This is an inline-function only.
-
-.. macro:: FLINT_FLOG2(x)
-           FLINT_CLOG2(x)
-
-    For `x \ge 1`, it returns `\lfloor \log_2 x \rfloor`
-    and `\lceil \log_2 x \rceil`, respectively.
-
 Integer types
 -----------------------------------------------
 
diff --git a/doc/source/longlong.rst b/doc/source/longlong.rst
index 3c6acd21cb..e7ee6df79f 100644
--- a/doc/source/longlong.rst
+++ b/doc/source/longlong.rst
@@ -3,7 +3,7 @@
 **longlong.h** -- support functions for multi-word arithmetic
 ===============================================================================
 
-Leading and trailing zeroes
+Bit manipulation
 -------------------------------------------------------------------------------
 
 .. macro:: flint_clz(x)
@@ -18,6 +18,17 @@ Leading and trailing zeroes
     As for ``flint_clz()``, but counts from the least significant end. If `x` is
     zero then the return value is undefined.
 
+.. function:: flint_bitcnt_t FLINT_BIT_COUNT(ulong x)
+
+    Returns the number of binary bits required to represent *x*. If *x* is zero
+    it returns *0*. This is an inline-function only.
+
+.. macro:: FLINT_FLOG2(x)
+           FLINT_CLOG2(x)
+
+    For `x \ge 1`, it returns `\lfloor \log_2 x \rfloor`
+    and `\lceil \log_2 x \rceil`, respectively.
+
 Addition and subtraction
 -------------------------------------------------------------------------------
 
diff --git a/src/bernoulli/bound_2exp_si.c b/src/bernoulli/bound_2exp_si.c
index af0c25af0f..eb8741da36 100644
--- a/src/bernoulli/bound_2exp_si.c
+++ b/src/bernoulli/bound_2exp_si.c
@@ -9,6 +9,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
+#include "longlong.h"
 #include "bernoulli.h"
 
 const short bernoulli_bound_tab[256] = {
diff --git a/src/bool_mat/pow_ui.c b/src/bool_mat/pow_ui.c
index da20321c18..8bbb6f9831 100644
--- a/src/bool_mat/pow_ui.c
+++ b/src/bool_mat/pow_ui.c
@@ -9,6 +9,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
+#include "longlong.h"
 #include "bool_mat.h"
 
 void
diff --git a/src/crt_helpers.h b/src/crt_helpers.h
index de729d0dd9..4d54633f09 100644
--- a/src/crt_helpers.h
+++ b/src/crt_helpers.h
@@ -18,7 +18,7 @@
 # include <intrin.h>
 #endif
 
-#include "flint.h"
+#include "longlong.h"
 #include "templates.h"
 
 #ifdef __cplusplus
diff --git a/src/d_mat/init.c b/src/d_mat/init.c
index 4e4812a573..3463760027 100644
--- a/src/d_mat/init.c
+++ b/src/d_mat/init.c
@@ -10,6 +10,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
+#include "long_extras.h"
 #include "d_mat.h"
 
 void
@@ -18,13 +19,24 @@ d_mat_init(d_mat_t mat, slong rows, slong cols)
     slong i;
 
     if (rows != 0)
-        mat->rows = (double **) flint_malloc(rows * sizeof(double *));
+        mat->rows = flint_malloc(rows * sizeof(double *));
     else
         mat->rows = NULL;
 
-    if (rows != 0 && cols != 0)       /* Allocate space for r*c small entries */
+    mat->r = rows;
+    mat->c = cols;
+
+    if (rows != 0 && cols != 0)
     {
-        mat->entries = (double *) flint_calloc(flint_mul_sizes(rows, cols), sizeof(double));
+        slong num;
+        int of;
+
+        of = z_mul_checked(&num, rows, cols);
+
+        if (of)
+            flint_throw(FLINT_ERROR, "Overflow creating a %wd x %wd object\n", rows, cols);
+
+        mat->entries = flint_calloc(num, sizeof(double));
 
         for (i = 0; i < rows; i++)
             mat->rows[i] = mat->entries + i * cols;
@@ -35,7 +47,4 @@ d_mat_init(d_mat_t mat, slong rows, slong cols)
         for (i = 0; i < rows; i++)
             mat->rows[i] = NULL;
     }
-
-    mat->r = rows;
-    mat->c = cols;
 }
diff --git a/src/fft_small.h b/src/fft_small.h
index e5314e728d..6552dcbcc6 100644
--- a/src/fft_small.h
+++ b/src/fft_small.h
@@ -12,6 +12,7 @@
 #ifndef FFT_SMALL_H
 #define FFT_SMALL_H
 
+#include "longlong.h"
 #include "machine_vectors.h"
 
 #define LG_BLK_SZ 8
diff --git a/src/flint.h.in b/src/flint.h.in
index 726ccee444..8fa9fb55c6 100644
--- a/src/flint.h.in
+++ b/src/flint.h.in
@@ -207,8 +207,6 @@ typedef const ulong * nn_srcptr;
 # endif
 #endif
 
-#include "longlong.h"
-
 /* memory ********************************************************************/
 
 FLINT_WARN_UNUSED FLINT_MALLOC FLINT_RETURNS_NONNULL void * flint_malloc(size_t size);
@@ -366,25 +364,6 @@ FLINT_INLINE ulong n_randint(flint_rand_t state, ulong limit)
 #define FLINT_SGN(x) ((0 < (slong)(x)) - ((slong)(x) < 0))
 #define FLINT_SWAP(T, x, y) do { T _swap_t = (x); (x) = (y); (y) = _swap_t; } while(0)
 
-#define r_shift(in, shift) \
-    ((shift == FLINT_BITS) ? WORD(0) : ((in) >> (shift)))
-
-#define l_shift(in, shift) \
-    ((shift == FLINT_BITS) ? WORD(0) : ((in) << (shift)))
-
-/* Beware when using the unsigned return value in signed arithmetic */
-FLINT_FORCE_INLINE
-flint_bitcnt_t FLINT_BIT_COUNT(ulong x)
-{
-   flint_bitcnt_t zeros = FLINT_BITS;
-   if (x) zeros = flint_clz(x);
-   return FLINT_BITS - zeros;
-}
-
-#define FLINT_FLOG2(k)  (FLINT_BIT_COUNT(k) - 1)
-
-#define FLINT_CLOG2(k)  FLINT_BIT_COUNT((k) - 1)
-
 /* allocation macros *********************************************************/
 
 #define FLINT_ARRAY_ALLOC(n, T) (T *) flint_malloc((n)*sizeof(T))
@@ -463,20 +442,6 @@ typedef enum
 
 FLINT_NORETURN void flint_throw(flint_err_t exc, const char * msg, ...);
 
-/* checked multiplication ****************************************************/
-
-FLINT_INLINE slong flint_mul_sizes(slong x, slong y)
-{
-    ulong hi, lo;
-
-    umul_ppmm(hi, lo, (ulong) x, (ulong) y);
-
-    if (hi != 0 || lo > WORD_MAX)
-        flint_throw(FLINT_OVERFLOW, "Overflow creating size %wd x %wd object.\n", x, y);
-
-    return lo;
-}
-
 /* FLINT generic type definitions ********************************************/
 
 typedef struct
diff --git a/src/fmpq_mat/init.c b/src/fmpq_mat/init.c
index 2f8ebc33ff..8e93f16439 100644
--- a/src/fmpq_mat/init.c
+++ b/src/fmpq_mat/init.c
@@ -10,6 +10,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
+#include "long_extras.h"
 #include "fmpq_mat.h"
 
 void fmpq_mat_init(fmpq_mat_t mat, slong rows, slong cols)
@@ -17,13 +18,24 @@ void fmpq_mat_init(fmpq_mat_t mat, slong rows, slong cols)
     slong i;
 
     if (rows != 0)
-        mat->rows = (fmpq **) flint_malloc(rows * sizeof(fmpq *));
+        mat->rows = flint_malloc(rows * sizeof(fmpq *));
     else
         mat->rows = NULL;
 
+    mat->r = rows;
+    mat->c = cols;
+
     if (rows != 0 && cols != 0)
     {
-        mat->entries = (fmpq *) flint_calloc(flint_mul_sizes(rows, cols), sizeof(fmpq));
+        slong num;
+        int of;
+
+        of = z_mul_checked(&num, rows, cols);
+
+        if (of)
+            flint_throw(FLINT_ERROR, "Overflow creating a %wd x %wd object\n", rows, cols);
+
+        mat->entries = flint_calloc(num, sizeof(fmpq));
 
         /* Set denominators */
         for (i = 0; i < rows * cols; i++)
@@ -41,7 +53,4 @@ void fmpq_mat_init(fmpq_mat_t mat, slong rows, slong cols)
                 mat->rows[i] = NULL;
         }
     }
-
-    mat->r = rows;
-    mat->c = cols;
 }
diff --git a/src/fmpz.h b/src/fmpz.h
index a79a6f1c9e..12848a7bdb 100644
--- a/src/fmpz.h
+++ b/src/fmpz.h
@@ -20,6 +20,7 @@
 
 #include <gmp.h>
 #include "fmpz_types.h"
+#include "longlong.h"
 
 #ifdef __cplusplus
  extern "C" {
diff --git a/src/fmpz_lll/mpf-impl.c b/src/fmpz_lll/mpf-impl.c
index 8094adb349..743e8ad6e1 100644
--- a/src/fmpz_lll/mpf-impl.c
+++ b/src/fmpz_lll/mpf-impl.c
@@ -10,6 +10,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
+#include "long_extras.h"
 #include "gmpcompat.h"
 #include "mpf-impl.h"
 #include "fmpz.h"
@@ -108,10 +109,22 @@ _mpf_vec_dot2(mpf_t res, const mpf * vec1, const mpf * vec2, slong len2, flint_b
 void
 mpf_mat_init(mpf_mat_t mat, slong rows, slong cols, flint_bitcnt_t prec)
 {
+    mat->r = rows;
+    mat->c = cols;
+    mat->prec = prec;
+
     if (rows != 0 && cols != 0)
     {
         slong i;
-        mat->entries = flint_malloc(flint_mul_sizes(rows, cols) * sizeof(mpf));
+        slong num;
+        int of;
+
+        of = z_mul_checked(&num, rows, cols);
+
+        if (of)
+            flint_throw(FLINT_ERROR, "Overflow creating a %wd x %wd object\n", rows, cols);
+
+        mat->entries = flint_malloc(num * sizeof(mpf));
         mat->rows = flint_malloc(rows * sizeof(mpf *));
 
         for (i = 0; i < rows * cols; i++)
@@ -124,10 +137,6 @@ mpf_mat_init(mpf_mat_t mat, slong rows, slong cols, flint_bitcnt_t prec)
        mat->entries = NULL;
        mat->rows = NULL;
     }
-
-    mat->r = rows;
-    mat->c = cols;
-    mat->prec = prec;
 }
 
 void mpf_mat_clear(mpf_mat_t mat)
diff --git a/src/fmpz_mat/fflu.c b/src/fmpz_mat/fflu.c
index c1dce2f497..8a00ade89d 100644
--- a/src/fmpz_mat/fflu.c
+++ b/src/fmpz_mat/fflu.c
@@ -15,6 +15,8 @@
 
 #define E(j,k) fmpz_mat_entry(B,j,k)
 
+#define r_shift(in, c) (((c) == FLINT_BITS) ? WORD(0) : ((in) >> (c)))
+
 slong
 fmpz_mat_fflu(fmpz_mat_t B, fmpz_t den, slong * perm,
                             const fmpz_mat_t A, int rank_check)
diff --git a/src/fmpz_mat/init.c b/src/fmpz_mat/init.c
index cd93d80300..b31989f6ce 100644
--- a/src/fmpz_mat/init.c
+++ b/src/fmpz_mat/init.c
@@ -10,6 +10,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
+#include "long_extras.h"
 #include "fmpz_mat.h"
 
 void
@@ -18,13 +19,24 @@ fmpz_mat_init(fmpz_mat_t mat, slong rows, slong cols)
     slong i;
 
     if (rows != 0)
-        mat->rows = (fmpz **) flint_malloc(rows * sizeof(fmpz *));
+        mat->rows = flint_malloc(rows * sizeof(fmpz *));
     else
         mat->rows = NULL;
 
-    if (rows != 0 && cols != 0)       /* Allocate space for r*c small entries */
+    mat->r = rows;
+    mat->c = cols;
+
+    if (rows != 0 && cols != 0)
     {
-        mat->entries = (fmpz *) flint_calloc(flint_mul_sizes(rows, cols), sizeof(fmpz));
+        slong num;
+        int of;
+
+        of = z_mul_checked(&num, rows, cols);
+
+        if (of)
+            flint_throw(FLINT_ERROR, "Overflow creating a %wd x %wd object\n", rows, cols);
+
+        mat->entries = flint_calloc(num, sizeof(fmpz));
 
         for (i = 0; i < rows; i++)
             mat->rows[i] = mat->entries + i * cols;
@@ -35,9 +47,6 @@ fmpz_mat_init(fmpz_mat_t mat, slong rows, slong cols)
         for (i = 0; i < rows; i++)
             mat->rows[i] = NULL;
     }
-
-    mat->r = rows;
-    mat->c = cols;
 }
 
 void
diff --git a/src/fmpz_mat/mul.c b/src/fmpz_mat/mul.c
index fb59a2e8b1..72fb4e62e2 100644
--- a/src/fmpz_mat/mul.c
+++ b/src/fmpz_mat/mul.c
@@ -13,6 +13,10 @@
 #include "fmpz.h"
 #include "fmpz_mat.h"
 
+#if FLINT_USES_BLAS
+# include "longlong.h"
+#endif
+
 void _fmpz_mat_mul_small_1(fmpz_mat_t C, const fmpz_mat_t A, const fmpz_mat_t B)
 {
     slong ar, br, bc;
diff --git a/src/fmpz_mat/mul_blas.c b/src/fmpz_mat/mul_blas.c
index 853ab28e27..74e295d6a2 100644
--- a/src/fmpz_mat/mul_blas.c
+++ b/src/fmpz_mat/mul_blas.c
@@ -10,6 +10,7 @@
 */
 
 #include "fmpz_mat.h"
+#include "longlong.h"
 
 /* todo: squaring optimizations */
 
diff --git a/src/fmpz_mat/solve_fflu_precomp.c b/src/fmpz_mat/solve_fflu_precomp.c
index 3f09d242eb..bc7a623369 100644
--- a/src/fmpz_mat/solve_fflu_precomp.c
+++ b/src/fmpz_mat/solve_fflu_precomp.c
@@ -20,6 +20,8 @@
 #define BB(ii,jj) fmpz_mat_entry(B,(ii),(jj))
 #define LU(ii,jj) fmpz_mat_entry(FFLU,(ii),(jj))
 
+#define r_shift(in, c) (((c) == FLINT_BITS) ? WORD(0) : ((in) >> (c)))
+
 void
 fmpz_mat_set_perm(fmpz_mat_t X, const slong * perm, const fmpz_mat_t B)
 {
diff --git a/src/fmpz_mod_poly/frobenius_powers_2exp_precomp.c b/src/fmpz_mod_poly/frobenius_powers_2exp_precomp.c
index 00bd7a31bf..403b257844 100644
--- a/src/fmpz_mod_poly/frobenius_powers_2exp_precomp.c
+++ b/src/fmpz_mod_poly/frobenius_powers_2exp_precomp.c
@@ -11,6 +11,7 @@
 
 #include "fmpz_mod.h"
 #include "fmpz_mod_poly.h"
+#include "longlong.h"
 
 void fmpz_mod_poly_frobenius_powers_2exp_precomp(
            fmpz_mod_poly_frobenius_powers_2exp_t pow, const fmpz_mod_poly_t f,
diff --git a/src/fmpz_mpoly.h b/src/fmpz_mpoly.h
index 1282ce497d..0714db2949 100644
--- a/src/fmpz_mpoly.h
+++ b/src/fmpz_mpoly.h
@@ -19,6 +19,7 @@
 #define FMPZ_MPOLY_INLINE static inline
 #endif
 
+#include "longlong.h"
 #include "mpoly_types.h"
 
 #ifdef __cplusplus
diff --git a/src/fmpz_poly_mat/init.c b/src/fmpz_poly_mat/init.c
index e659a2e65e..af0985100e 100644
--- a/src/fmpz_poly_mat/init.c
+++ b/src/fmpz_poly_mat/init.c
@@ -9,6 +9,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
+#include "long_extras.h"
 #include "fmpz_poly.h"
 #include "fmpz_poly_mat.h"
 
@@ -18,13 +19,24 @@ fmpz_poly_mat_init(fmpz_poly_mat_t A, slong rows, slong cols)
     slong i;
 
     if (rows != 0)
-        A->rows = (fmpz_poly_struct **) flint_malloc(rows * sizeof(fmpz_poly_struct *));
+        A->rows = flint_malloc(rows * sizeof(fmpz_poly_struct *));
     else
         A->rows = NULL;
 
+    A->r = rows;
+    A->c = cols;
+
     if (rows != 0 && cols != 0)
     {
-        A->entries = (fmpz_poly_struct *) flint_malloc(flint_mul_sizes(rows, cols) * sizeof(fmpz_poly_struct));
+        slong num;
+        int of;
+
+        of = z_mul_checked(&num, rows, cols);
+
+        if (of)
+            flint_throw(FLINT_ERROR, "Overflow creating a %wd x %wd object\n", rows, cols);
+
+        A->entries = flint_malloc(num * sizeof(fmpz_poly_struct));
 
         for (i = 0; i < rows * cols; i++)
             fmpz_poly_init(A->entries + i);
@@ -41,9 +53,6 @@ fmpz_poly_mat_init(fmpz_poly_mat_t A, slong rows, slong cols)
                 A->rows[i] = NULL;
         }
     }
-
-    A->r = rows;
-    A->c = cols;
 }
 
 void
diff --git a/src/fmpz_poly_mat/mul_KS.c b/src/fmpz_poly_mat/mul_KS.c
index 0b078be36a..317ad7c745 100644
--- a/src/fmpz_poly_mat/mul_KS.c
+++ b/src/fmpz_poly_mat/mul_KS.c
@@ -9,7 +9,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
-#include "flint.h"
+#include "longlong.h"
 #include "fmpz_poly.h"
 #include "fmpz_poly_mat.h"
 #include "fmpz_mat.h"
diff --git a/src/fmpz_poly_mat/pow.c b/src/fmpz_poly_mat/pow.c
index 30496641bd..d71ef535d2 100644
--- a/src/fmpz_poly_mat/pow.c
+++ b/src/fmpz_poly_mat/pow.c
@@ -9,7 +9,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
-#include "flint.h"
+#include "longlong.h"
 #include "fmpz_poly.h"
 #include "fmpz_poly_mat.h"
 
diff --git a/src/fmpz_poly_mat/pow_trunc.c b/src/fmpz_poly_mat/pow_trunc.c
index fec2df17eb..09642fdf07 100644
--- a/src/fmpz_poly_mat/pow_trunc.c
+++ b/src/fmpz_poly_mat/pow_trunc.c
@@ -9,7 +9,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
-#include "flint.h"
+#include "longlong.h"
 #include "fmpz_poly.h"
 #include "fmpz_poly_mat.h"
 
diff --git a/src/fmpz_poly_mat/sqr_KS.c b/src/fmpz_poly_mat/sqr_KS.c
index 030073822b..bf875a4975 100644
--- a/src/fmpz_poly_mat/sqr_KS.c
+++ b/src/fmpz_poly_mat/sqr_KS.c
@@ -9,7 +9,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
-#include "flint.h"
+#include "longlong.h"
 #include "fmpz_poly.h"
 #include "fmpz_poly_mat.h"
 #include "fmpz_mat.h"
diff --git a/src/fq_mat_templates/init.c b/src/fq_mat_templates/init.c
index 59d71e8324..78662bceb3 100644
--- a/src/fq_mat_templates/init.c
+++ b/src/fq_mat_templates/init.c
@@ -11,50 +11,51 @@
 */
 
 #ifdef T
-
+#include "long_extras.h"
 #include "templates.h"
 
 void
-TEMPLATE(T, mat_init) (TEMPLATE(T, mat_t) mat, slong rows, slong cols,
+TEMPLATE(T, mat_init)(TEMPLATE(T, mat_t) mat, slong rows, slong cols,
                        const TEMPLATE(T, ctx_t) ctx)
 {
     slong i;
 
+    mat->r = rows;
+    mat->c = cols;
+
     if (rows != 0)
-        mat->rows = (TEMPLATE(T, struct) **) flint_malloc(rows
-			                       * sizeof(TEMPLATE(T, struct) *));
+        mat->rows = flint_malloc(rows * sizeof(TEMPLATE(T, struct) *));
     else
         mat->rows = NULL;
 
-    if (rows != 0 && cols != 0)       /* Allocate space for r*c small entries */
+    if (rows != 0 && cols != 0)
     {
         slong j;
+        slong num;
+        int of;
 
-	mat->entries = (TEMPLATE(T, struct) *) flint_malloc(flint_mul_sizes(rows, cols)
-                                                * sizeof(TEMPLATE(T, struct)));
+        of = z_mul_checked(&num, rows, cols);
+
+        if (of)
+            flint_throw(FLINT_ERROR, "Overflow creating a %wd x %wd object\n", rows, cols);
+
+        mat->entries = flint_malloc(num * sizeof(TEMPLATE(T, struct)));
 
         for (i = 0; i < rows; i++)
         {
             mat->rows[i] = mat->entries + i * cols;
             for (j = 0; j < cols; j++)
-            {
                 TEMPLATE(T, init) (mat->rows[i] + j, ctx);
-            }
         }
     }
     else
     {
         mat->entries = NULL;
-	if (rows != 0)
-	{
+        if (rows != 0)
+        {
             for (i = 0; i < rows; i++)
                 mat->rows[i] = NULL;
-	}
+        }
     }
-
-    mat->r = rows;
-    mat->c = cols;
 }
-
-
 #endif
diff --git a/src/fq_nmod_mpoly_factor/n_bpoly_fq_factor_smprime.c b/src/fq_nmod_mpoly_factor/n_bpoly_fq_factor_smprime.c
index a4cc35e13b..55da43afd4 100644
--- a/src/fq_nmod_mpoly_factor/n_bpoly_fq_factor_smprime.c
+++ b/src/fq_nmod_mpoly_factor/n_bpoly_fq_factor_smprime.c
@@ -9,6 +9,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
+#include "longlong.h"
 #include "fq_nmod.h"
 #include "nmod_mat.h"
 #include "fmpz_poly_factor.h"
diff --git a/src/gmpcompat-longlong.h.in b/src/gmpcompat-longlong.h.in
index 2c905a8d5c..7eb489a172 100644
--- a/src/gmpcompat-longlong.h.in
+++ b/src/gmpcompat-longlong.h.in
@@ -13,7 +13,7 @@
 #define GMP_COMPAT_H
 
 #include <gmp.h>
-#include "flint.h"
+#include "longlong.h"
 
 #define FLINT_MPZ_REALLOC(z, len)       \
     ((len) > ((z)->_mp_alloc)           \
diff --git a/src/gmpcompat.h.in b/src/gmpcompat.h.in
index 3ed3980e3a..2973ee9f3d 100644
--- a/src/gmpcompat.h.in
+++ b/src/gmpcompat.h.in
@@ -13,7 +13,7 @@
 #define GMP_COMPAT_H
 
 #include <gmp.h>
-#include "flint.h"
+#include "longlong.h"
 
 #define FLINT_MPZ_REALLOC(z, len)       \
     ((len) > ((z)->_mp_alloc)           \
diff --git a/src/gr_mat/init.c b/src/gr_mat/init.c
index 79bfd71f80..ebc9a14528 100644
--- a/src/gr_mat/init.c
+++ b/src/gr_mat/init.c
@@ -9,6 +9,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
+#include "long_extras.h"
 #include "gr_mat.h"
 
 void
@@ -18,6 +19,9 @@ gr_mat_init(gr_mat_t mat, slong rows, slong cols, gr_ctx_t ctx)
 
     sz = ctx->sizeof_elem;
 
+    mat->r = rows;
+    mat->c = cols;
+
     if (rows != 0)
         mat->rows = flint_malloc(rows * sizeof(gr_ptr));
     else
@@ -25,7 +29,15 @@ gr_mat_init(gr_mat_t mat, slong rows, slong cols, gr_ctx_t ctx)
 
     if (rows != 0 && cols != 0)
     {
-        mat->entries = (gr_ptr) flint_malloc(flint_mul_sizes(rows, cols) * sz);
+        slong num;
+        int of;
+
+        of = z_mul_checked(&num, rows, cols);
+
+        if (of)
+            flint_throw(FLINT_ERROR, "Overflow creating a %wd x %wd object\n", rows, cols);
+
+        mat->entries = flint_malloc(num * sz);
 
         _gr_vec_init(mat->entries, rows * cols, ctx);
 
@@ -38,7 +50,4 @@ gr_mat_init(gr_mat_t mat, slong rows, slong cols, gr_ctx_t ctx)
         for (i = 0; i < rows; i++)
             mat->rows[i] = NULL;
     }
-
-    mat->r = rows;
-    mat->c = cols;
 }
diff --git a/src/gr_poly/evaluate_vec_fast.c b/src/gr_poly/evaluate_vec_fast.c
index 1d6c4224d2..6ddef31af3 100644
--- a/src/gr_poly/evaluate_vec_fast.c
+++ b/src/gr_poly/evaluate_vec_fast.c
@@ -9,6 +9,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
+#include "longlong.h"
 #include "gr_vec.h"
 #include "gr_poly.h"
 
diff --git a/src/gr_poly/hgcd.c b/src/gr_poly/hgcd.c
index 1eafaa7779..98c2abb5e9 100644
--- a/src/gr_poly/hgcd.c
+++ b/src/gr_poly/hgcd.c
@@ -11,6 +11,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
+#include "longlong.h"
 #include "gr_poly.h"
 #include "gr_vec.h"
 
diff --git a/src/gr_poly/pow_ui_binexp.c b/src/gr_poly/pow_ui_binexp.c
index 0f530e19cd..7ae26c001e 100644
--- a/src/gr_poly/pow_ui_binexp.c
+++ b/src/gr_poly/pow_ui_binexp.c
@@ -9,6 +9,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
+#include "longlong.h"
 #include "gr_vec.h"
 #include "gr_poly.h"
 
diff --git a/src/long_extras.h b/src/long_extras.h
index c916714399..31fa391829 100644
--- a/src/long_extras.h
+++ b/src/long_extras.h
@@ -18,7 +18,7 @@
 #define LONG_EXTRAS_INLINE static inline
 #endif
 
-#include "flint.h"
+#include "longlong.h"
 
 #ifdef __cplusplus
  extern "C" {
@@ -32,19 +32,25 @@ size_t z_sizeinbase(slong n, int b);
 
 LONG_EXTRAS_INLINE int z_mul_checked(slong * a, slong b, slong c)
 {
-    /* TODO __builtin_mul_overflow */
+#if defined(__GNUC__)
+    return __builtin_mul_overflow(b, c, a);
+#else
 	ulong ahi, alo;
 	smul_ppmm(ahi, alo, b, c);
 	*a = alo;
 	return FLINT_SIGN_EXT(alo) != ahi;
+#endif
 }
 
 LONG_EXTRAS_INLINE int z_add_checked(slong * a, slong b, slong c)
 {
-    /* TODO __builtin_add_overflow */
+#if defined(__GNUC__)
+    return __builtin_add_overflow(b, c, a);
+#else
     int of = (b > 0 && c > WORD_MAX - b) || (b < 0 && c < WORD_MIN - b);
     *a = b + c;
     return of;
+#endif
 }
 
 LONG_EXTRAS_INLINE
diff --git a/src/longlong.h b/src/longlong.h
index 3fc7808923..5704a570b4 100644
--- a/src/longlong.h
+++ b/src/longlong.h
@@ -19,6 +19,8 @@
 #ifndef FLINT_LONGLONG_H
 #define FLINT_LONGLONG_H
 
+#include "flint.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -92,6 +94,18 @@ static inline int flint_ctz(ulong x)
 }
 #endif
 
+/* Beware when using the unsigned return value in signed arithmetic */
+FLINT_FORCE_INLINE
+flint_bitcnt_t FLINT_BIT_COUNT(ulong x)
+{
+    flint_bitcnt_t zeros = FLINT_BITS;
+    if (x) zeros = flint_clz(x);
+    return FLINT_BITS - zeros;
+}
+
+#define FLINT_FLOG2(k) (FLINT_BIT_COUNT(k) - 1)
+#define FLINT_CLOG2(k) FLINT_BIT_COUNT((k) - 1)
+
 /* Addition and subtraction */
 #if !defined(add_ssaaaa)
 # define add_ssaaaa(s1, s0, a1, a0, b1, b0) \
diff --git a/src/mpfr_mat/init.c b/src/mpfr_mat/init.c
index 13237ff493..4904cf3622 100644
--- a/src/mpfr_mat/init.c
+++ b/src/mpfr_mat/init.c
@@ -10,19 +10,29 @@
 */
 
 #include <mpfr.h>
-#include "flint.h"
+#include "long_extras.h"
 #include "mpfr_mat.h"
 
 void
 mpfr_mat_init(mpfr_mat_t mat, slong rows, slong cols, mpfr_prec_t prec)
 {
+    mat->r = rows;
+    mat->c = cols;
+    mat->prec = prec;
 
-    if (rows != 0 && cols != 0)       /* Allocate space for r*c small entries */
+    if (rows != 0 && cols != 0)
     {
         slong i;
-        mat->entries =
-            (__mpfr_struct *) flint_malloc(flint_mul_sizes(rows, cols) * sizeof(__mpfr_struct));
-        mat->rows = (__mpfr_struct **) flint_malloc(rows * sizeof(__mpfr_struct *));  /* Initialise rows */
+        slong num;
+        int of;
+
+        of = z_mul_checked(&num, rows, cols);
+
+        if (of)
+            flint_throw(FLINT_ERROR, "Overflow creating a %wd x %wd object\n", rows, cols);
+
+        mat->entries = flint_malloc(num * sizeof(__mpfr_struct));
+        mat->rows = flint_malloc(rows * sizeof(__mpfr_struct *));
 
         for (i = 0; i < rows * cols; i++)
             mpfr_init2(mat->entries + i, prec);
@@ -31,8 +41,4 @@ mpfr_mat_init(mpfr_mat_t mat, slong rows, slong cols, mpfr_prec_t prec)
     }
     else
         mat->entries = NULL;
-
-    mat->r = rows;
-    mat->c = cols;
-    mat->prec = prec;
 }
diff --git a/src/mpn_extras.h b/src/mpn_extras.h
index 0393e85043..64ffc2bb7f 100644
--- a/src/mpn_extras.h
+++ b/src/mpn_extras.h
@@ -22,7 +22,7 @@
 #endif
 
 #include <gmp.h>
-#include "flint.h"
+#include "longlong.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/src/mpoly.h b/src/mpoly.h
index 45ac5175ef..14dfbeb92f 100644
--- a/src/mpoly.h
+++ b/src/mpoly.h
@@ -21,6 +21,7 @@
 
 #include <string.h>
 #include <gmp.h>
+#include "longlong.h"
 #include "mpoly_types.h"
 
 #ifdef __cplusplus
diff --git a/src/mpoly/test/t-pack_unpack.c b/src/mpoly/test/t-pack_unpack.c
index a9af811b0a..ac98bd0e10 100644
--- a/src/mpoly/test/t-pack_unpack.c
+++ b/src/mpoly/test/t-pack_unpack.c
@@ -13,6 +13,9 @@
 #include "ulong_extras.h"
 #include "mpoly.h"
 
+#define l_shift(in, shift) \
+    ((shift == FLINT_BITS) ? WORD(0) : ((in) << (shift)))
+
 TEST_FUNCTION_START(mpoly_pack_unpack, state)
 {
     slong k, i, length, nfields, bits1, bits2;
diff --git a/src/nmod.h b/src/nmod.h
index 13f61f7bc0..ecaafcf6b2 100644
--- a/src/nmod.h
+++ b/src/nmod.h
@@ -27,74 +27,75 @@ extern "C" {
 #endif
 
 #define NMOD_RED2(r, a_hi, a_lo, mod) \
-   do { \
-      ulong q0xx, q1xx, r1xx; \
-      const ulong u1xx = ((a_hi)<<(mod).norm) + r_shift((a_lo), FLINT_BITS - (mod).norm);	\
-      const ulong u0xx = ((a_lo)<<(mod).norm); \
-      const ulong nxx = ((mod).n<<(mod).norm); \
-      umul_ppmm(q1xx, q0xx, (mod).ninv, u1xx); \
-      add_ssaaaa(q1xx, q0xx, q1xx, q0xx, u1xx, u0xx); \
-      r1xx = (u0xx - (q1xx + 1)*nxx); \
-      if (r1xx > q0xx) r1xx += nxx; \
-      if (r1xx < nxx) r = (r1xx>>(mod).norm); \
-      else r = ((r1xx - nxx)>>(mod).norm); \
-   } while (0)
+  do { \
+    ulong q0xx, q1xx, r1xx; \
+    const ulong u1xx = ((a_hi)<<(mod).norm) \
+     + (((mod).norm == 0) ? WORD(0) : (a_lo)>>(FLINT_BITS - (mod).norm)); \
+    const ulong u0xx = (a_lo)<<(mod).norm; \
+    const ulong nxx = (mod).n<<(mod).norm; \
+    umul_ppmm(q1xx, q0xx, (mod).ninv, u1xx); \
+    add_ssaaaa(q1xx, q0xx, q1xx, q0xx, u1xx, u0xx); \
+    r1xx = (u0xx - (q1xx + 1)*nxx); \
+    if (r1xx > q0xx) r1xx += nxx; \
+    if (r1xx < nxx) r = (r1xx>>(mod).norm); \
+    else r = ((r1xx - nxx)>>(mod).norm); \
+  } while (0)
 
 #define NMOD_RED(r, a, mod) \
-   do { \
-      NMOD_RED2(r, 0, a, mod); \
-   } while (0)
+  do { \
+    NMOD_RED2(r, 0, a, mod); \
+  } while (0)
 
 #define NMOD2_RED2(r, a_hi, a_lo, mod) \
-    do { \
-       ulong v_hi;	\
-       NMOD_RED(v_hi, a_hi, mod); \
-       NMOD_RED2(r, v_hi, a_lo, mod); \
-    } while (0)
+  do { \
+    ulong v_hi;	\
+    NMOD_RED(v_hi, a_hi, mod); \
+    NMOD_RED2(r, v_hi, a_lo, mod); \
+  } while (0)
 
 #define NMOD_RED3(r, a_hi, a_me, a_lo, mod) \
-    do { \
-       ulong v_hi;	\
-       NMOD_RED2(v_hi, a_hi, a_me, mod); \
-       NMOD_RED2(r, v_hi, a_lo, mod); \
-    } while (0)
+  do { \
+    ulong v_hi;	\
+    NMOD_RED2(v_hi, a_hi, a_me, mod); \
+    NMOD_RED2(r, v_hi, a_lo, mod); \
+  } while (0)
 
 #define NMOD_BITS(mod) (FLINT_BITS - ((mod).norm))
 #define NMOD_CAN_USE_SHOUP(mod) ((mod).norm > 0)
 
 #define NMOD_MUL_PRENORM(res, a, b, mod) \
-    do { \
-        ulong q0xx, q1xx, rxx, p_hixx, p_loxx; \
-        ulong nxx, ninvxx; \
-        unsigned int normxx; \
-        ninvxx = (mod).ninv; \
-        normxx = (mod).norm; \
-        nxx = (mod).n << normxx; \
-        umul_ppmm(p_hixx, p_loxx, (a), (b)); \
-        umul_ppmm(q1xx, q0xx, ninvxx, p_hixx); \
-        add_ssaaaa(q1xx, q0xx, q1xx, q0xx, p_hixx, p_loxx); \
-        rxx = (p_loxx - (q1xx + 1) * nxx); \
-        if (rxx > q0xx) \
-            rxx += nxx; \
-        rxx = (rxx < nxx ? rxx : rxx - nxx) >> normxx; \
-        (res) = rxx; \
-    } while (0)
+  do { \
+    ulong q0xx, q1xx, rxx, p_hixx, p_loxx; \
+    ulong nxx, ninvxx; \
+    unsigned int normxx; \
+    ninvxx = (mod).ninv; \
+    normxx = (mod).norm; \
+    nxx = (mod).n << normxx; \
+    umul_ppmm(p_hixx, p_loxx, (a), (b)); \
+    umul_ppmm(q1xx, q0xx, ninvxx, p_hixx); \
+    add_ssaaaa(q1xx, q0xx, q1xx, q0xx, p_hixx, p_loxx); \
+    rxx = (p_loxx - (q1xx + 1) * nxx); \
+    if (rxx > q0xx) \
+      rxx += nxx; \
+    rxx = (rxx < nxx ? rxx : rxx - nxx) >> normxx; \
+    (res) = rxx; \
+  } while (0)
 
 #define NMOD_MUL_FULLWORD(res, a, b, mod) \
-    do { \
-        ulong q0xx, q1xx, rxx, p_hixx, p_loxx; \
-        ulong nxx, ninvxx; \
-        ninvxx = (mod).ninv; \
-        nxx = (mod).n; \
-        umul_ppmm(p_hixx, p_loxx, (a), (b)); \
-        umul_ppmm(q1xx, q0xx, ninvxx, p_hixx); \
-        add_ssaaaa(q1xx, q0xx, q1xx, q0xx, p_hixx, p_loxx); \
-        rxx = (p_loxx - (q1xx + 1) * nxx); \
-        if (rxx > q0xx) \
-            rxx += nxx; \
-        rxx = (rxx < nxx ? rxx : rxx - nxx); \
-        (res) = rxx; \
-    } while (0)
+  do { \
+    ulong q0xx, q1xx, rxx, p_hixx, p_loxx; \
+    ulong nxx, ninvxx; \
+    ninvxx = (mod).ninv; \
+    nxx = (mod).n; \
+    umul_ppmm(p_hixx, p_loxx, (a), (b)); \
+    umul_ppmm(q1xx, q0xx, ninvxx, p_hixx); \
+    add_ssaaaa(q1xx, q0xx, q1xx, q0xx, p_hixx, p_loxx); \
+    rxx = (p_loxx - (q1xx + 1) * nxx); \
+    if (rxx > q0xx) \
+      rxx += nxx; \
+    rxx = (rxx < nxx ? rxx : rxx - nxx); \
+    (res) = rxx; \
+  } while (0)
 
 NMOD_INLINE ulong nmod_set_ui(ulong x, nmod_t mod)
 {
diff --git a/src/nmod_mat/init.c b/src/nmod_mat/init.c
index 8f327857e8..822639c83d 100644
--- a/src/nmod_mat/init.c
+++ b/src/nmod_mat/init.c
@@ -10,6 +10,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
+#include "long_extras.h"
 #include "mpn_extras.h"
 #include "nmod_mat.h"
 
@@ -19,13 +20,24 @@ nmod_mat_init(nmod_mat_t mat, slong rows, slong cols, ulong n)
     slong i;
 
     if (rows != 0)
-        mat->rows = (ulong **) flint_malloc(rows * sizeof(ulong *));
+        mat->rows = flint_malloc(rows * sizeof(ulong *));
     else
         mat->rows = NULL;
 
+    mat->r = rows;
+    mat->c = cols;
+
     if (rows != 0 && cols != 0)
     {
-        mat->entries = (ulong *) flint_calloc(flint_mul_sizes(rows, cols), sizeof(ulong));
+        slong num;
+        int of;
+
+        of = z_mul_checked(&num, rows, cols);
+
+        if (of)
+            flint_throw(FLINT_ERROR, "Overflow creating a %wd x %wd object\n", rows, cols);
+
+        mat->entries = flint_calloc(num, sizeof(ulong));
 
         for (i = 0; i < rows; i++)
             mat->rows[i] = mat->entries + i * cols;
@@ -33,16 +45,13 @@ nmod_mat_init(nmod_mat_t mat, slong rows, slong cols, ulong n)
     else
     {
         mat->entries = NULL;
-	if (rows != 0)
+        if (rows != 0)
         {
             for (i = 0; i < rows; i++)
                 mat->rows[i] = NULL;
         }
     }
 
-    mat->r = rows;
-    mat->c = cols;
-
     nmod_mat_set_mod(mat, n);
 }
 
@@ -58,9 +67,22 @@ nmod_mat_init_set(nmod_mat_t mat, const nmod_mat_t src)
     else
         mat->rows = NULL;
 
-    if ((rows) && (cols))
+    mat->r = rows;
+    mat->c = cols;
+
+    mat->mod = src->mod;
+
+    if (rows != 0 && cols != 0)
     {
-        mat->entries = flint_malloc(flint_mul_sizes(rows, cols) * sizeof(ulong));
+        slong num;
+        int of;
+
+        of = z_mul_checked(&num, rows, cols);
+
+        if (of)
+            flint_throw(FLINT_ERROR, "Overflow creating a %wd x %wd object\n", rows, cols);
+
+        mat->entries = flint_malloc(num * sizeof(ulong));
 
         for (i = 0; i < rows; i++)
         {
@@ -71,15 +93,10 @@ nmod_mat_init_set(nmod_mat_t mat, const nmod_mat_t src)
     else
     {
         mat->entries = NULL;
-	if (rows != 0)
+        if (rows != 0)
         {
             for (i = 0; i < rows; i++)
                 mat->rows[i] = NULL;
-	}
+        }
     }
-
-    mat->r = rows;
-    mat->c = cols;
-
-    mat->mod = src->mod;
 }
diff --git a/src/nmod_mat/mul.c b/src/nmod_mat/mul.c
index e5810a2f67..245a536ad5 100644
--- a/src/nmod_mat/mul.c
+++ b/src/nmod_mat/mul.c
@@ -15,6 +15,7 @@
 #include "thread_support.h"
 
 #if FLINT_USES_BLAS
+# include "longlong.h"
 # include "cblas.h"
 #endif
 
diff --git a/src/nmod_poly/bit_pack.c b/src/nmod_poly/bit_pack.c
index c29bc9bc1a..c20cf56bb0 100644
--- a/src/nmod_poly/bit_pack.c
+++ b/src/nmod_poly/bit_pack.c
@@ -14,6 +14,8 @@
 #include "nmod_poly.h"
 #include "fmpz.h"
 
+#define r_shift(in, c) (((c) == FLINT_BITS) ? WORD(0) : ((in) >> (c)))
+
 /* Assumes length > 0, bits > 0. */
 void
 _nmod_poly_bit_pack(nn_ptr res, nn_srcptr poly, slong len, flint_bitcnt_t bits)
diff --git a/src/nmod_poly/get_str.c b/src/nmod_poly/get_str.c
index 0e166f2e29..1e390bd935 100644
--- a/src/nmod_poly/get_str.c
+++ b/src/nmod_poly/get_str.c
@@ -13,6 +13,7 @@
 #include <string.h>
 #include <math.h>
 #include "nmod_poly.h"
+#include "longlong.h"
 
 char * nmod_poly_get_str(const nmod_poly_t poly)
 {
diff --git a/src/nmod_poly/mulhigh.c b/src/nmod_poly/mulhigh.c
index a25a1d3ae0..bd42557af1 100644
--- a/src/nmod_poly/mulhigh.c
+++ b/src/nmod_poly/mulhigh.c
@@ -10,6 +10,7 @@
 */
 
 #include "nmod_poly.h"
+#include "longlong.h"
 
 void _nmod_poly_mulhigh(nn_ptr res, nn_srcptr poly1, slong len1,
                              nn_srcptr poly2, slong len2, slong n, nmod_t mod)
diff --git a/src/nmod_poly_factor/factor.c b/src/nmod_poly_factor/factor.c
index 9346fc859a..927829bae8 100644
--- a/src/nmod_poly_factor/factor.c
+++ b/src/nmod_poly_factor/factor.c
@@ -13,6 +13,7 @@
 */
 
 #include <math.h>
+#include "longlong.h"
 #include "nmod_poly.h"
 #include "nmod_poly_factor.h"
 
diff --git a/src/nmod_poly_mat/init.c b/src/nmod_poly_mat/init.c
index 518b9fda3f..334c11611f 100644
--- a/src/nmod_poly_mat/init.c
+++ b/src/nmod_poly_mat/init.c
@@ -9,7 +9,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
-#include "flint.h"
+#include "long_extras.h"
 #include "nmod_poly.h"
 #include "nmod_poly_mat.h"
 
@@ -19,13 +19,25 @@ nmod_poly_mat_init(nmod_poly_mat_t A, slong rows, slong cols, ulong n)
     slong i;
 
     if (rows > 0)
-        A->rows = (nmod_poly_struct **) flint_malloc(rows * sizeof(nmod_poly_struct *));
+        A->rows = flint_malloc(rows * sizeof(nmod_poly_struct *));
     else
         A->rows = NULL;
 
+    A->modulus = n;
+    A->r = rows;
+    A->c = cols;
+
     if (rows > 0 && cols > 0)
     {
-        A->entries = (nmod_poly_struct *) flint_malloc(flint_mul_sizes(rows, cols) * sizeof(nmod_poly_struct));
+        slong num;
+        int of;
+
+        of = z_mul_checked(&num, rows, cols);
+
+        if (of)
+            flint_throw(FLINT_ERROR, "Overflow creating a %wd x %wd object\n", rows, cols);
+
+        A->entries = flint_malloc(num * sizeof(nmod_poly_struct));
 
         for (i = 0; i < rows * cols; i++)
             nmod_poly_init(A->entries + i, n);
@@ -36,14 +48,10 @@ nmod_poly_mat_init(nmod_poly_mat_t A, slong rows, slong cols, ulong n)
     else
     {
         A->entries = NULL;
-	if (rows > 0)
+        if (rows > 0)
         {
             for (i = 0; i < rows; i++)
                 A->rows[i] = NULL;
         }
     }
-
-    A->modulus = n;
-    A->r = rows;
-    A->c = cols;
 }
diff --git a/src/nmod_poly_mat/pow.c b/src/nmod_poly_mat/pow.c
index 5a899488c7..546b2f9f55 100644
--- a/src/nmod_poly_mat/pow.c
+++ b/src/nmod_poly_mat/pow.c
@@ -9,7 +9,7 @@
     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
 */
 
-#include "flint.h"
+#include "longlong.h"
 #include "nmod_poly.h"
 #include "nmod_poly_mat.h"
 
diff --git a/src/nmod_vec/max_bits.c b/src/nmod_vec/max_bits.c
index b9fed615c0..8d60ccc312 100644
--- a/src/nmod_vec/max_bits.c
+++ b/src/nmod_vec/max_bits.c
@@ -11,6 +11,7 @@
 */
 
 #include "nmod_vec.h"
+#include "longlong.h"
 
 flint_bitcnt_t _nmod_vec_max_bits(nn_srcptr vec, slong len)
 {
diff --git a/src/test/t-flint_clz.c b/src/test/t-flint_clz.c
index 23af381085..9027caca8a 100644
--- a/src/test/t-flint_clz.c
+++ b/src/test/t-flint_clz.c
@@ -12,6 +12,8 @@
 #include "ulong_extras.h"
 #include "test_helpers.h"
 
+#define r_shift(in, c) (((c) == FLINT_BITS) ? WORD(0) : ((in) >> (c)))
+
 TEST_FUNCTION_START(flint_clz, state)
 {
     int i, result;
@@ -19,14 +21,16 @@ TEST_FUNCTION_START(flint_clz, state)
     for (i = 0; i < 100000 * flint_test_multiplier(); i++)
     {
         ulong n;
-        unsigned int count = 0;
+        unsigned int count;
 
         n = n_randtest(state);
 
-        if (n != 0)
-            count = flint_clz(n);
+        if (n == 0)
+            continue;
+
+        count = flint_clz(n);
 
-        result = ((n == UWORD(0)) || (((slong)(n << count) < WORD(0)) && (r_shift(n, FLINT_BITS-count) == UWORD(0))));
+        result = ((slong)(n << count) < WORD(0)) && (r_shift(n, FLINT_BITS-count) == UWORD(0));
         if (!result)
             TEST_FUNCTION_FAIL("n = %wu, count = %u\n", n, count);
     }
diff --git a/src/test/t-flint_ctz.c b/src/test/t-flint_ctz.c
index 472165ee34..24029e001d 100644
--- a/src/test/t-flint_ctz.c
+++ b/src/test/t-flint_ctz.c
@@ -12,6 +12,9 @@
 #include "ulong_extras.h"
 #include "test_helpers.h"
 
+#define l_shift(in, shift) \
+    ((shift == FLINT_BITS) ? WORD(0) : ((in) << (shift)))
+
 TEST_FUNCTION_START(flint_ctz, state)
 {
    int i, result;
@@ -19,14 +22,16 @@ TEST_FUNCTION_START(flint_ctz, state)
    for (i = 0; i < 100000 * flint_test_multiplier(); i++)
    {
       ulong n;
-      unsigned int count = 0;
+      unsigned int count;
 
       n = n_randtest(state);
 
-      if (n != 0)
-         count = flint_ctz(n);
+      if (n == 0)
+         continue;
+
+      count = flint_ctz(n);
 
-      result = ((n == UWORD(0)) || (((n >> count) & UWORD(1)) && (l_shift(n, FLINT_BITS-count) == UWORD(0))));
+      result = ((n >> count) & UWORD(1)) && (l_shift(n, FLINT_BITS-count) == UWORD(0));
       if (!result)
             TEST_FUNCTION_FAIL("n = %wu, count = %u\n", n, count);
    }
diff --git a/src/ulong_extras.h b/src/ulong_extras.h
index b3d392e443..729e83dbec 100644
--- a/src/ulong_extras.h
+++ b/src/ulong_extras.h
@@ -23,6 +23,7 @@
 #endif
 
 #include "limb_types.h"
+#include "longlong.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -116,24 +117,36 @@ ulong n_CRT(ulong r1, ulong m1, ulong r2, ulong m2);
 
 ULONG_EXTRAS_INLINE int n_mul_checked(ulong * a, ulong b, ulong c)
 {
+#if defined(__GNUC__)
+    return __builtin_mul_overflow(b, c, a);
+#else
 	ulong ahi, alo;
 	umul_ppmm(ahi, alo, b, c);
 	*a = alo;
 	return 0 != ahi;
+#endif
 }
 
 ULONG_EXTRAS_INLINE int n_add_checked(ulong * a, ulong b, ulong c)
 {
+#if defined(__GNUC__)
+    return __builtin_add_overflow(b, c, a);
+#else
     int of = b + c < b;
     *a = b + c;
     return of;
+#endif
 }
 
 ULONG_EXTRAS_INLINE int n_sub_checked(ulong * a, ulong b, ulong c)
 {
+#if defined(__GNUC__)
+    return __builtin_sub_overflow(b, c, a);
+#else
     int of = b < c;
     *a = b - c;
     return of;
+#endif
 }
 
 /* Modular arithmetic ********************************************************/
diff --git a/src/ulong_extras/div2_preinv.c b/src/ulong_extras/div2_preinv.c
index e0206b35a2..aa749d834f 100644
--- a/src/ulong_extras/div2_preinv.c
+++ b/src/ulong_extras/div2_preinv.c
@@ -18,6 +18,8 @@
    https://gmplib.org/~tege/division-paper.pdf
 */
 
+#define r_shift(in, c) (((c) == FLINT_BITS) ? WORD(0) : ((in) >> (c)))
+
 ulong
 n_div2_preinv(ulong a, ulong n, ulong ninv)
 {
diff --git a/src/ulong_extras/divrem2_preinv.c b/src/ulong_extras/divrem2_preinv.c
index 23e4a0ca24..248c7c0ec6 100644
--- a/src/ulong_extras/divrem2_preinv.c
+++ b/src/ulong_extras/divrem2_preinv.c
@@ -18,6 +18,8 @@
    https://gmplib.org/~tege/division-paper.pdf
 */
 
+#define r_shift(in, c) (((c) == FLINT_BITS) ? WORD(0) : ((in) >> (c)))
+
 ulong
 n_divrem2_preinv(ulong * q, ulong a, ulong n, ulong ninv)
 {
diff --git a/src/ulong_extras/factor_SQUFOF.c b/src/ulong_extras/factor_SQUFOF.c
index 69cf67718a..c847f5ab0b 100644
--- a/src/ulong_extras/factor_SQUFOF.c
+++ b/src/ulong_extras/factor_SQUFOF.c
@@ -12,6 +12,8 @@
 #include <gmp.h>
 #include "ulong_extras.h"
 
+#define r_shift(in, c) (((c) == FLINT_BITS) ? WORD(0) : ((in) >> (c)))
+
 ulong _ll_factor_SQUFOF(ulong n_hi, ulong n_lo, ulong max_iters)
 {
     ulong n[2];
diff --git a/src/ulong_extras/ll_mod_preinv.c b/src/ulong_extras/ll_mod_preinv.c
index b1616b5d62..b903552af7 100644
--- a/src/ulong_extras/ll_mod_preinv.c
+++ b/src/ulong_extras/ll_mod_preinv.c
@@ -18,6 +18,8 @@
    https://gmplib.org/~tege/division-paper.pdf
 */
 
+#define r_shift(in, c) (((c) == FLINT_BITS) ? WORD(0) : ((in) >> (c)))
+
 ulong
 n_ll_mod_preinv(ulong a_hi, ulong a_lo, ulong n, ulong ninv)
 {
diff --git a/src/ulong_extras/lll_mod_preinv.c b/src/ulong_extras/lll_mod_preinv.c
index e8859ea479..bd69204990 100644
--- a/src/ulong_extras/lll_mod_preinv.c
+++ b/src/ulong_extras/lll_mod_preinv.c
@@ -18,6 +18,8 @@
    https://gmplib.org/~tege/division-paper.pdf
 */
 
+#define r_shift(in, c) (((c) == FLINT_BITS) ? WORD(0) : ((in) >> (c)))
+
 ulong
 n_lll_mod_preinv(ulong a_hi, ulong a_mi, ulong a_lo, ulong n, ulong ninv)
 {
diff --git a/src/ulong_extras/mod2_preinv.c b/src/ulong_extras/mod2_preinv.c
index ae96fec452..21f24f3bbd 100644
--- a/src/ulong_extras/mod2_preinv.c
+++ b/src/ulong_extras/mod2_preinv.c
@@ -18,6 +18,8 @@
    https://gmplib.org/~tege/division-paper.pdf
 */
 
+#define r_shift(in, c) (((c) == FLINT_BITS) ? WORD(0) : ((in) >> (c)))
+
 ulong
 n_mod2_preinv(ulong a, ulong n, ulong ninv)
 {
diff --git a/src/ulong_extras/randomisation.c b/src/ulong_extras/randomisation.c
index f9a5bc1ea5..c016afe4ee 100644
--- a/src/ulong_extras/randomisation.c
+++ b/src/ulong_extras/randomisation.c
@@ -16,10 +16,15 @@
 #include "ulong_extras.h"
 #include "fmpz.h"
 
+#define l_shift(in, shift) \
+    ((shift == FLINT_BITS) ? WORD(0) : ((in) << (shift)))
+
 ulong n_randbits(flint_rand_t state, unsigned int bits)
 {
-   if (bits == 0) return UWORD(0);
-   else return (UWORD(1) << (bits - 1)) | n_randint(state, l_shift(UWORD(1), bits));
+   if (bits == 0)
+       return UWORD(0);
+   else
+       return (UWORD(1) << (bits - 1)) | n_randint(state, l_shift(UWORD(1), bits));
 }
 
 ulong n_urandint(flint_rand_t state, ulong limit)