diff --git a/doc/source/nmod_poly.rst b/doc/source/nmod_poly.rst
index d528e90394..e802fdc3a3 100644
--- a/doc/source/nmod_poly.rst
+++ b/doc/source/nmod_poly.rst
@@ -1032,6 +1032,12 @@ Division
 
     Computes the remainder `R` on polynomial division of `A` by `B`.
 
+.. function:: void _nmod_poly_divexact(mp_ptr Q, mp_srcptr A, slong lenA, mp_srcptr B, slong lenB, nmod_t mod)
+              void nmod_poly_divexact(nmod_poly_t Q, const nmod_poly_t A, const nmod_poly_t B)
+
+    Computes the quotient `Q` of `A` and `B` assuming that the division
+    is exact.
+
 .. function:: void _nmod_poly_inv_series_basecase(mp_ptr Qinv, mp_srcptr Q, slong Qlen, slong n, nmod_t mod)
 
     Given ``Q`` of length ``Qlen`` whose leading coefficient is invertible
diff --git a/src/gr/nmod.c b/src/gr/nmod.c
index aac855e7cc..323ebbda31 100644
--- a/src/gr/nmod.c
+++ b/src/gr/nmod.c
@@ -1002,6 +1002,19 @@ _gr_nmod_poly_divrem(mp_ptr Q, mp_ptr R, mp_srcptr A, slong lenA,
     }
 }
 
+int
+_gr_nmod_poly_divexact(mp_ptr Q, mp_srcptr A, slong lenA, mp_srcptr B, slong lenB, gr_ctx_t ctx)
+{
+    slong lenQ = lenA - lenB + 1;
+
+    if (lenB <= 40 || lenQ <= 20)
+        return _gr_poly_divexact_basecase(Q, A, lenA, B, lenB, ctx);
+    else if (lenB <= 60 || lenQ <= 60)
+        return _gr_poly_divexact_basecase_bidirectional(Q, A, lenA, B, lenB, ctx);
+    else
+        return _gr_poly_divexact_bidirectional(Q, A, lenA, B, lenB, ctx);
+}
+
 /* basecase -> Newton cutoffs */
 /* todo: better unbalanced tuning */
 
@@ -1434,6 +1447,7 @@ gr_method_tab_input __gr_nmod_methods_input[] =
     {GR_METHOD_VEC_RECIPROCALS, (gr_funcptr) _gr_nmod_vec_reciprocals},
     {GR_METHOD_POLY_MULLOW,     (gr_funcptr) _gr_nmod_poly_mullow},
     {GR_METHOD_POLY_DIVREM,     (gr_funcptr) _gr_nmod_poly_divrem},
+    {GR_METHOD_POLY_DIVEXACT,   (gr_funcptr) _gr_nmod_poly_divexact},
     {GR_METHOD_POLY_INV_SERIES, (gr_funcptr) _gr_nmod_poly_inv_series},
     {GR_METHOD_POLY_INV_SERIES_BASECASE, (gr_funcptr) _gr_nmod_poly_inv_series_basecase},
     {GR_METHOD_POLY_DIV_SERIES, (gr_funcptr) _gr_nmod_poly_div_series},
diff --git a/src/gr_mat/minpoly_field.c b/src/gr_mat/minpoly_field.c
index d67e4d0deb..e15b56edc1 100644
--- a/src/gr_mat/minpoly_field.c
+++ b/src/gr_mat/minpoly_field.c
@@ -165,6 +165,8 @@ gr_mat_minpoly_field(gr_poly_t p, const gr_mat_t X, gr_ctx_t ctx)
         }
         _gr_poly_set_length(b, r1 + 1, ctx);
 
+        /* todo: poly_divexact */
+        /* todo: compute as (p * b) / g or (p / g) * b or p * (g / b) ? */
         status |= gr_poly_gcd(g, p, b, ctx);
         status |= gr_poly_mul(p, p, b, ctx);
         status |= gr_poly_divrem(p, r, p, g, ctx);
diff --git a/src/gr_poly/divexact_basecase.c b/src/gr_poly/divexact_basecase.c
index 9c154869b0..d6994c9c10 100644
--- a/src/gr_poly/divexact_basecase.c
+++ b/src/gr_poly/divexact_basecase.c
@@ -48,6 +48,11 @@ _gr_poly_divexact_basecase(gr_ptr Q,
     slong sz = ctx->sizeof_elem;
     gr_ptr invB;
 
+    /* note: if we wanted to be clever, we could pick a pair of coefficients such
+             that the division is easy */
+    if (Alen == Blen)
+        return gr_divexact(Q, GR_ENTRY(A, Alen - 1, sz), GR_ENTRY(B, Blen - 1, sz), ctx);
+
     GR_TMP_INIT(invB, ctx);
 
     /* todo: we sometimes want to keep dividing, e.g. over RR with small coefficient */
diff --git a/src/n_poly.h b/src/n_poly.h
index 3a76cff8b4..84848de029 100644
--- a/src/n_poly.h
+++ b/src/n_poly.h
@@ -410,6 +410,22 @@ void _n_poly_mod_div(n_poly_t Q, const n_poly_t A, const n_poly_t B, nmod_t mod)
     Q->length = lenA - lenB + 1;
 }
 
+FLINT_FORCE_INLINE
+void _n_poly_mod_divexact(n_poly_t Q, const n_poly_t A, const n_poly_t B, nmod_t mod)
+{
+    const slong lenA = A->length, lenB = B->length;
+    FLINT_ASSERT(lenB > 0);
+    FLINT_ASSERT(Q != A && Q != B);
+    if (lenA < lenB)
+    {
+        n_poly_zero(Q);
+        return;
+    }
+    n_poly_fit_length(Q, lenA - lenB + 1);
+    _nmod_poly_divexact(Q->coeffs, A->coeffs, lenA, B->coeffs, lenB, mod);
+    Q->length = lenA - lenB + 1;
+}
+
 FLINT_FORCE_INLINE
 void _n_poly_mod_rem(n_poly_t R, const n_poly_t A, const n_poly_t B, nmod_t mod)
 {
@@ -472,6 +488,9 @@ void n_poly_mod_rem(n_poly_t R, const n_poly_t A, const n_poly_t B,
 void n_poly_mod_divrem(n_poly_t Q, n_poly_t R, const n_poly_t A,
                                                  const n_poly_t B, nmod_t mod);
 
+void n_poly_mod_divexact(n_poly_t Q, const n_poly_t A, const n_poly_t B,
+                                                                   nmod_t mod);
+
 void n_poly_mod_mulmod(n_poly_t res, const n_poly_t poly1,
                            const n_poly_t poly2, const n_poly_t f, nmod_t mod);
 
diff --git a/src/n_poly/n_bpoly_mod.c b/src/n_poly/n_bpoly_mod.c
index ff33616164..19b4e48e8a 100644
--- a/src/n_poly/n_bpoly_mod.c
+++ b/src/n_poly/n_bpoly_mod.c
@@ -81,7 +81,7 @@ void n_bpoly_mod_divexact_last(n_bpoly_t A, const n_poly_t b, nmod_t ctx)
         if (A->coeffs[i].length < 1)
             continue;
 
-        n_poly_mod_div(t, A->coeffs + i, b, ctx);
+        n_poly_mod_divexact(t, A->coeffs + i, b, ctx);
         n_poly_swap(A->coeffs + i, t);
     }
 }
diff --git a/src/n_poly/n_bpoly_mod_gcd.c b/src/n_poly/n_bpoly_mod_gcd.c
index f261e48d87..af27cefa15 100644
--- a/src/n_poly/n_bpoly_mod_gcd.c
+++ b/src/n_poly/n_bpoly_mod_gcd.c
@@ -225,8 +225,8 @@ int n_bpoly_mod_gcd_brown_smprime(
 
     n_poly_mod_gcd(cG, cA, cB, ctx);
 
-    n_poly_mod_div(cAbar, cA, cG, ctx);
-    n_poly_mod_div(cBbar, cB, cG, ctx);
+    n_poly_mod_divexact(cAbar, cA, cG, ctx);
+    n_poly_mod_divexact(cBbar, cB, cG, ctx);
 
     n_poly_mod_gcd(gamma, A->coeffs + A->length - 1, B->coeffs + B->length - 1, ctx);
 
@@ -311,11 +311,11 @@ int n_bpoly_mod_gcd_brown_smprime(
     else
     {
         n_poly_mod_gcd(Gevalp, Aevalp, Bevalp, ctx);
-        n_poly_mod_div(Abarevalp, Aevalp, Gevalp, ctx);
-        n_poly_mod_div(Bbarevalp, Bevalp, Gevalp, ctx);
+        n_poly_mod_divexact(Abarevalp, Aevalp, Gevalp, ctx);
+        n_poly_mod_divexact(Bbarevalp, Bevalp, Gevalp, ctx);
         n_poly_mod_gcd(Gevalm, Aevalm, Bevalm, ctx);
-        n_poly_mod_div(Abarevalm, Aevalm, Gevalm, ctx);
-        n_poly_mod_div(Bbarevalm, Bevalm, Gevalm, ctx);
+        n_poly_mod_divexact(Abarevalm, Aevalm, Gevalm, ctx);
+        n_poly_mod_divexact(Bbarevalm, Bevalm, Gevalm, ctx);
         gstab = astab = bstab = 0;
     }
 
diff --git a/src/n_poly/n_poly_mod.c b/src/n_poly/n_poly_mod.c
index 5cb761ce3b..ef1aa2483b 100644
--- a/src/n_poly/n_poly_mod.c
+++ b/src/n_poly/n_poly_mod.c
@@ -339,6 +339,57 @@ void n_poly_mod_div(n_poly_t Q, const n_poly_t A, const n_poly_t B, nmod_t ctx)
     Q->length = A_len - B_len + 1;
 }
 
+void n_poly_mod_divexact(n_poly_t Q, const n_poly_t A, const n_poly_t B, nmod_t ctx)
+{
+    n_poly_t tQ;
+    mp_ptr q;
+    slong A_len, B_len;
+
+    B_len = B->length;
+
+    if (B_len == 0)
+    {
+        if (ctx.n == 1)
+        {
+            n_poly_set(Q, A);
+            return;
+        }
+        else
+        {
+            flint_throw(FLINT_ERROR, "Exception (n_poly_mod_divexact). Division by zero.\n");
+        }
+    }
+
+    A_len = A->length;
+
+    if (A_len < B_len)
+    {
+        n_poly_zero(Q);
+        return;
+    }
+
+    if (Q == A || Q == B)
+    {
+        n_poly_init2(tQ, A_len - B_len + 1);
+        q = tQ->coeffs;
+    }
+    else
+    {
+        n_poly_fit_length(Q, A_len - B_len + 1);
+        q = Q->coeffs;
+    }
+
+    _nmod_poly_divexact(q, A->coeffs, A_len, B->coeffs, B_len, ctx);
+
+    if (Q == A || Q == B)
+    {
+        n_poly_swap(tQ, Q);
+        n_poly_clear(tQ);
+    }
+
+    Q->length = A_len - B_len + 1;
+}
+
 void n_poly_mod_rem(n_poly_t R, const n_poly_t A, const n_poly_t B, nmod_t ctx)
 {
     const slong lenA = A->length, lenB = B->length;
diff --git a/src/n_poly/n_polyu1n_gcd.c b/src/n_poly/n_polyu1n_gcd.c
index acde94e4bd..adfaa13a68 100644
--- a/src/n_poly/n_polyu1n_gcd.c
+++ b/src/n_poly/n_polyu1n_gcd.c
@@ -298,8 +298,8 @@ int n_polyu1n_mod_gcd_brown_smprime(
     _n_poly_vec_mod_remove_content(cB, B->coeffs, B->length, ctx);
 
     n_poly_mod_gcd(cG, cA, cB, ctx);
-    n_poly_mod_div(cAbar, cA, cG, ctx);
-    n_poly_mod_div(cBbar, cB, cG, ctx);
+    n_poly_mod_divexact(cAbar, cA, cG, ctx);
+    n_poly_mod_divexact(cBbar, cB, cG, ctx);
 
     n_poly_mod_gcd(gamma, A->coeffs + 0, B->coeffs + 0, ctx);
 
@@ -377,11 +377,11 @@ int n_polyu1n_mod_gcd_brown_smprime(
     else
     {
         n_poly_mod_gcd(Gevalp, Aevalp, Bevalp, ctx);
-        n_poly_mod_div(Abarevalp, Aevalp, Gevalp, ctx);
-        n_poly_mod_div(Bbarevalp, Bevalp, Gevalp, ctx);
+        n_poly_mod_divexact(Abarevalp, Aevalp, Gevalp, ctx);
+        n_poly_mod_divexact(Bbarevalp, Bevalp, Gevalp, ctx);
         n_poly_mod_gcd(Gevalm, Aevalm, Bevalm, ctx);
-        n_poly_mod_div(Abarevalm, Aevalm, Gevalm, ctx);
-        n_poly_mod_div(Bbarevalm, Bevalm, Gevalm, ctx);
+        n_poly_mod_divexact(Abarevalm, Aevalm, Gevalm, ctx);
+        n_poly_mod_divexact(Bbarevalm, Bevalm, Gevalm, ctx);
         gstab = astab = bstab = 0;
     }
 
diff --git a/src/nmod_mat/minpoly.c b/src/nmod_mat/minpoly.c
index 19957b8e57..919ded83c2 100644
--- a/src/nmod_mat/minpoly.c
+++ b/src/nmod_mat/minpoly.c
@@ -164,8 +164,9 @@ void nmod_mat_minpoly_with_gens(nmod_poly_t p, const nmod_mat_t X, ulong * P)
       _nmod_poly_set_length(b, r1 + 1);
 
       nmod_poly_gcd(g, p, b);
+      /* todo: compute as (p * b) / g or (p / g) * b or p * (g / b) ? */
       nmod_poly_mul(p, p, b);
-      nmod_poly_div(p, p, g);
+      nmod_poly_divexact(p, p, g);
 
       if (first_poly && r2 < n)
       {
diff --git a/src/nmod_mpoly/gcd.c b/src/nmod_mpoly/gcd.c
index 1745951998..ef9c832b26 100644
--- a/src/nmod_mpoly/gcd.c
+++ b/src/nmod_mpoly/gcd.c
@@ -960,14 +960,14 @@ int _do_univar(
                                                  I->Gmin_exp, I->Gstride, ctx);
     if (Abar != NULL)
     {
-        nmod_poly_div(t, a, g);
+        nmod_poly_divexact(t, a, g);
         _nmod_mpoly_from_nmod_poly_inflate(Abar, I->Abarbits, t, v_in_both,
                                               I->Abarmin_exp, I->Gstride, ctx);
     }
 
     if (Bbar != NULL)
     {
-        nmod_poly_div(t, b, g);
+        nmod_poly_divexact(t, b, g);
         _nmod_mpoly_from_nmod_poly_inflate(Bbar, I->Bbarbits, t, v_in_both,
                                               I->Bbarmin_exp, I->Gstride, ctx);
     }
diff --git a/src/nmod_mpoly/gcd_brown.c b/src/nmod_mpoly/gcd_brown.c
index a4b5a3c90f..622e738f79 100644
--- a/src/nmod_mpoly/gcd_brown.c
+++ b/src/nmod_mpoly/gcd_brown.c
@@ -155,11 +155,11 @@ static void _splitworker_bivar(void * varg)
         else
         {
             n_poly_mod_gcd(Gevalp, Aevalp, Bevalp, ctx->mod);
-            n_poly_mod_div(Abarevalp, Aevalp, Gevalp, ctx->mod);
-            n_poly_mod_div(Bbarevalp, Bevalp, Gevalp, ctx->mod);
+            n_poly_mod_divexact(Abarevalp, Aevalp, Gevalp, ctx->mod);
+            n_poly_mod_divexact(Bbarevalp, Bevalp, Gevalp, ctx->mod);
             n_poly_mod_gcd(Gevalm, Aevalm, Bevalm, ctx->mod);
-            n_poly_mod_div(Abarevalm, Aevalm, Gevalm, ctx->mod);
-            n_poly_mod_div(Bbarevalm, Bevalm, Gevalm, ctx->mod);
+            n_poly_mod_divexact(Abarevalm, Aevalm, Gevalm, ctx->mod);
+            n_poly_mod_divexact(Bbarevalm, Bevalm, Gevalm, ctx->mod);
         }
 
         FLINT_ASSERT(Gevalp->length > 0);
@@ -874,8 +874,8 @@ int nmod_mpolyn_gcd_brown_smprime_threaded_pool(
 
     n_poly_init(cAbar);
     n_poly_init(cBbar);
-    n_poly_mod_div(cAbar, cA, cG, ctx->mod);
-    n_poly_mod_div(cBbar, cB, cG, ctx->mod);
+    n_poly_mod_divexact(cAbar, cA, cG, ctx->mod);
+    n_poly_mod_divexact(cBbar, cB, cG, ctx->mod);
 
     n_poly_init(gamma);
     n_poly_mod_gcd(gamma, nmod_mpolyn_leadcoeff_poly(A, ctx),
diff --git a/src/nmod_mpoly/mpolyn_divides_threaded.c b/src/nmod_mpoly/mpolyn_divides_threaded.c
index 76cb554464..5cc58a076d 100644
--- a/src/nmod_mpoly/mpolyn_divides_threaded.c
+++ b/src/nmod_mpoly/mpolyn_divides_threaded.c
@@ -1817,7 +1817,7 @@ int nmod_mpolyn_divides_threaded_pool(
     qexps = (ulong *) TMP_ALLOC(N*sizeof(ulong));
 
     mpoly_monomial_sub(qexps + N*0, A->exps + N*0, B->exps + N*0, N);
-    n_poly_mod_div(qcoeff, A->coeffs + 0, B->coeffs + 0, ctx->mod); /* already checked */
+    n_poly_mod_divexact(qcoeff, A->coeffs + 0, B->coeffs + 0, ctx->mod); /* already checked */
 
     nmod_mpolyn_ts_init(H->polyQ, qcoeff, qexps, 1, H->bits, H->N, ctx);
 
diff --git a/src/nmod_mpoly/mpolyn_gcd_brown.c b/src/nmod_mpoly/mpolyn_gcd_brown.c
index 2d2320dd84..f67cf14578 100644
--- a/src/nmod_mpoly/mpolyn_gcd_brown.c
+++ b/src/nmod_mpoly/mpolyn_gcd_brown.c
@@ -95,8 +95,8 @@ int nmod_mpolyn_gcd_brown_smprime_bivar(
 
     n_poly_mod_gcd(cG, cA, cB, ctx->mod);
 
-    n_poly_mod_div(cAbar, cA, cG, ctx->mod);
-    n_poly_mod_div(cBbar, cB, cG, ctx->mod);
+    n_poly_mod_divexact(cAbar, cA, cG, ctx->mod);
+    n_poly_mod_divexact(cBbar, cB, cG, ctx->mod);
 
     n_poly_mod_gcd(gamma, nmod_mpolyn_leadcoeff_poly(A, ctx),
                           nmod_mpolyn_leadcoeff_poly(B, ctx), ctx->mod);
@@ -186,11 +186,11 @@ int nmod_mpolyn_gcd_brown_smprime_bivar(
     else
     {
         n_poly_mod_gcd(Gevalp, Aevalp, Bevalp, ctx->mod);
-        n_poly_mod_div(Abarevalp, Aevalp, Gevalp, ctx->mod);
-        n_poly_mod_div(Bbarevalp, Bevalp, Gevalp, ctx->mod);
+        n_poly_mod_divexact(Abarevalp, Aevalp, Gevalp, ctx->mod);
+        n_poly_mod_divexact(Bbarevalp, Bevalp, Gevalp, ctx->mod);
         n_poly_mod_gcd(Gevalm, Aevalm, Bevalm, ctx->mod);
-        n_poly_mod_div(Abarevalm, Aevalm, Gevalm, ctx->mod);
-        n_poly_mod_div(Bbarevalm, Bevalm, Gevalm, ctx->mod);
+        n_poly_mod_divexact(Abarevalm, Aevalm, Gevalm, ctx->mod);
+        n_poly_mod_divexact(Bbarevalm, Bevalm, Gevalm, ctx->mod);
         gstab = astab = bstab = 0;
     }
 
@@ -412,8 +412,8 @@ int nmod_mpolyn_gcd_brown_smprime(
 
     n_poly_mod_gcd(cG, cA, cB, ctx->mod);
 
-    n_poly_mod_div(cAbar, cA, cG, ctx->mod);
-    n_poly_mod_div(cBbar, cB, cG, ctx->mod);
+    n_poly_mod_divexact(cAbar, cA, cG, ctx->mod);
+    n_poly_mod_divexact(cBbar, cB, cG, ctx->mod);
 
     n_poly_mod_gcd(gamma, nmod_mpolyn_leadcoeff_poly(A, ctx),
                           nmod_mpolyn_leadcoeff_poly(B, ctx), ctx->mod);
@@ -739,8 +739,8 @@ int nmod_mpolyn_gcd_brown_lgprime_bivar(
 
     n_poly_init(cAbar);
     n_poly_init(cBbar);
-    n_poly_mod_div(cAbar, cA, cG, ctx->mod);
-    n_poly_mod_div(cBbar, cB, cG, ctx->mod);
+    n_poly_mod_divexact(cAbar, cA, cG, ctx->mod);
+    n_poly_mod_divexact(cBbar, cB, cG, ctx->mod);
 
     n_poly_init(gamma);
     n_poly_mod_gcd(gamma, nmod_mpolyn_leadcoeff_poly(A, ctx),
@@ -983,8 +983,8 @@ int nmod_mpolyn_gcd_brown_lgprime(
 
     n_poly_init(cAbar);
     n_poly_init(cBbar);
-    n_poly_mod_div(cAbar, cA, cG, ctx->mod);
-    n_poly_mod_div(cBbar, cB, cG, ctx->mod);
+    n_poly_mod_divexact(cAbar, cA, cG, ctx->mod);
+    n_poly_mod_divexact(cBbar, cB, cG, ctx->mod);
 
     n_poly_init(gamma);
     n_poly_mod_gcd(gamma, nmod_mpolyn_leadcoeff_poly(A, ctx),
diff --git a/src/nmod_mpoly/mpolyu_gcdp_zippel.c b/src/nmod_mpoly/mpolyu_gcdp_zippel.c
index 4099dbbb48..35aaa91920 100644
--- a/src/nmod_mpoly/mpolyu_gcdp_zippel.c
+++ b/src/nmod_mpoly/mpolyu_gcdp_zippel.c
@@ -628,9 +628,9 @@ static int nmod_mpolyu_gcdp_zippel_univar(
     nmod_mpolyu_cvtto_poly(b, B, ctx);
     nmod_poly_gcd(g, a, b);
     nmod_mpolyu_cvtfrom_poly(G, g, ctx);
-    nmod_poly_div(t, a, g);
+    nmod_poly_divexact(t, a, g);
     nmod_mpolyu_cvtfrom_poly(Abar, t, ctx);
-    nmod_poly_div(t, b, g);
+    nmod_poly_divexact(t, b, g);
     nmod_mpolyu_cvtfrom_poly(Bbar, t, ctx);
     nmod_poly_clear(a);
     nmod_poly_clear(b);
diff --git a/src/nmod_mpoly_factor/lcc_wang.c b/src/nmod_mpoly_factor/lcc_wang.c
index 429655afb2..ffdb84b64f 100644
--- a/src/nmod_mpoly_factor/lcc_wang.c
+++ b/src/nmod_mpoly_factor/lcc_wang.c
@@ -88,7 +88,7 @@ int nmod_mpoly_factor_lcc_wang(
             while (n_poly_degree(R) > 0)
             {
                 n_poly_mod_gcd(R, R, Q, ctx->mod);
-                n_poly_mod_div(Q, Q, R, ctx->mod);
+                n_poly_mod_divexact(Q, Q, R, ctx->mod);
                 if (n_poly_degree(Q) < 1)
                 {
                     success = 0;
diff --git a/src/nmod_mpoly_factor/n_bpoly_mod_factor_smprime.c b/src/nmod_mpoly_factor/n_bpoly_mod_factor_smprime.c
index 43e52cd973..3b02a10afa 100644
--- a/src/nmod_mpoly_factor/n_bpoly_mod_factor_smprime.c
+++ b/src/nmod_mpoly_factor/n_bpoly_mod_factor_smprime.c
@@ -437,7 +437,7 @@ static void _n_bpoly_mod_lift_build_steps(n_bpoly_mod_lift_t L, nmod_t ctx)
     for (k = 0; k < r; k++)
     {
         /* s[k] = (prod_{i!=k} B[i].coeffs[0])^-1 (mod B[k].coeffs[0]) */
-        n_poly_mod_div(t, A->coeffs + 0, B[k].coeffs + 0, ctx);
+        n_poly_mod_divexact(t, A->coeffs + 0, B[k].coeffs + 0, ctx);
         if (!n_poly_mod_invmod(s + k, t, B[k].coeffs + 0, ctx))
             flint_throw(FLINT_IMPINV, "n_bpoly_mod_lift: bad inverse");
 
diff --git a/src/nmod_mpoly_factor/n_bpoly_mod_hlift.c b/src/nmod_mpoly_factor/n_bpoly_mod_hlift.c
index b4fc4da71a..31d5846204 100644
--- a/src/nmod_mpoly_factor/n_bpoly_mod_hlift.c
+++ b/src/nmod_mpoly_factor/n_bpoly_mod_hlift.c
@@ -140,7 +140,7 @@ int n_bpoly_mod_hlift2_cubic(
         n_poly_mod_rem(u, t, B0->coeffs + 0, ctx);
         n_poly_mod_mul(t, u, B1->coeffs + 0, ctx);
         n_poly_mod_sub(c, c, t, ctx);
-        n_poly_mod_div(v, c, B0->coeffs + 0, ctx);
+        n_poly_mod_divexact(v, c, B0->coeffs + 0, ctx);
 
         if (!n_poly_is_zero(u))
         {
@@ -275,7 +275,7 @@ int n_bpoly_mod_hlift2(
         n_poly_mod_rem(u, t, B0->coeffs + 0, ctx);
         n_poly_mod_mul(t, u, B1->coeffs + 0, ctx);
         n_poly_mod_sub(c, c, t, ctx);
-        n_poly_mod_div(v, c, B0->coeffs + 0, ctx);
+        n_poly_mod_divexact(v, c, B0->coeffs + 0, ctx);
 
         if (j < B0->length)
             n_poly_mod_add(B0->coeffs + j, B0->coeffs + j, u, ctx);
@@ -430,7 +430,7 @@ int n_bpoly_mod_hlift_cubic(
     for (k = 0; k < r; k++)
     {
         /* s[k] = (prod_{i!=k} B[i].coeffs[0])^-1 (mod B[k].coeffs[0]) */
-        n_poly_mod_div(t, A->coeffs + 0, B[k].coeffs + 0, ctx);
+        n_poly_mod_divexact(t, A->coeffs + 0, B[k].coeffs + 0, ctx);
         if (!n_poly_mod_invmod(s[k], t, B[k].coeffs + 0, ctx))
         {
             success = -1;
diff --git a/src/nmod_mpoly_factor/n_bpoly_mod_pfrac.c b/src/nmod_mpoly_factor/n_bpoly_mod_pfrac.c
index 63ea102fb0..39c488e757 100644
--- a/src/nmod_mpoly_factor/n_bpoly_mod_pfrac.c
+++ b/src/nmod_mpoly_factor/n_bpoly_mod_pfrac.c
@@ -116,7 +116,7 @@ int n_bpoly_mod_pfrac2(
     _n_poly_mod_rem(C1evalp, t2, B1evalp, mod);
     _n_poly_mod_mul(t2, B2evalp, C1evalp, mod);
     n_poly_mod_sub(Aevalp, Aevalp, t2, mod);
-    _n_poly_mod_div(C2evalp, Aevalp, B1evalp, mod);
+    _n_poly_mod_divexact(C2evalp, Aevalp, B1evalp, mod);
 
     if (!n_poly_mod_invmod(t1, B2evalm, B1evalm, mod))
         goto bad_prime;
@@ -124,7 +124,7 @@ int n_bpoly_mod_pfrac2(
     _n_poly_mod_rem(C1evalm, t2, B1evalm, mod);
     _n_poly_mod_mul(t2, B2evalm, C1evalm, mod);
     n_poly_mod_sub(Aevalm, Aevalm, t2, mod);
-    _n_poly_mod_div(C2evalm, Aevalm, B1evalm, mod);
+    _n_poly_mod_divexact(C2evalm, Aevalm, B1evalm, mod);
 
     /* update interpolants */
     if (n_poly_degree(modulus) > 0)
diff --git a/src/nmod_poly.h b/src/nmod_poly.h
index 13668faa23..dac1d99b70 100644
--- a/src/nmod_poly.h
+++ b/src/nmod_poly.h
@@ -603,6 +603,13 @@ void _nmod_poly_rem(mp_ptr R, mp_srcptr A, slong lenA,
 void nmod_poly_rem(nmod_poly_t R,
                                      const nmod_poly_t A, const nmod_poly_t B);
 
+void _nmod_poly_divexact(mp_ptr Q, mp_srcptr A, slong lenA,
+                                          mp_srcptr B, slong lenB, nmod_t mod);
+
+void nmod_poly_divexact(nmod_poly_t Q,
+                                     const nmod_poly_t A, const nmod_poly_t B);
+
+
 void _nmod_poly_inv_series_basecase(mp_ptr Qinv,
                                  mp_srcptr Q, slong Qlen, slong n, nmod_t mod);
 
diff --git a/src/nmod_poly/divexact.c b/src/nmod_poly/divexact.c
new file mode 100644
index 0000000000..55cdf1e6af
--- /dev/null
+++ b/src/nmod_poly/divexact.c
@@ -0,0 +1,89 @@
+/*
+    Copyright (C) 2010 Sebastian Pancratz
+    Copyright (C) 2011 William Hart
+    Copyright (C) 2023, 2024 Fredrik Johansson
+
+    This file is part of FLINT.
+
+    FLINT is free software: you can redistribute it and/or modify it under
+    the terms of the GNU Lesser General Public License (LGPL) as published
+    by the Free Software Foundation; either version 2.1 of the License, or
+    (at your option) any later version.  See <https://www.gnu.org/licenses/>.
+*/
+
+#include "nmod.h"
+#include "nmod_vec.h"
+#include "nmod_poly.h"
+#include "gr_poly.h"
+
+void
+_nmod_poly_divexact(mp_ptr Q, mp_srcptr A, slong lenA,  mp_srcptr B, slong lenB, nmod_t mod)
+{
+    if (lenA == lenB)
+    {
+        Q[0] = nmod_div(A[lenA - 1], B[lenB - 1], mod);
+    }
+    else if (lenB == 1)
+    {
+        _nmod_vec_scalar_mul_nmod(Q, A, lenA, nmod_inv(B[0], mod), mod);
+    }
+    else
+    {
+        gr_ctx_t ctx;
+        _gr_ctx_init_nmod(ctx, &mod);
+        GR_MUST_SUCCEED(_gr_poly_divexact(Q, A, lenA, B, lenB, ctx));
+    }
+}
+
+void
+nmod_poly_divexact(nmod_poly_t Q,
+                 const nmod_poly_t A, const nmod_poly_t B)
+{
+    nmod_poly_t tQ;
+    mp_ptr q;
+    slong A_len, B_len;
+
+    B_len = B->length;
+
+    if (B_len == 0)
+    {
+        if (nmod_poly_modulus(B) == 1)
+        {
+            nmod_poly_set(Q, A);
+            return;
+        }
+        else
+        {
+            flint_throw(FLINT_ERROR, "Exception (nmod_poly_divexact). Division by zero.\n");
+        }
+    }
+
+    A_len = A->length;
+
+    if (A_len < B_len)
+    {
+        nmod_poly_zero(Q);
+        return;
+    }
+
+    if (Q == A || Q == B)
+    {
+        nmod_poly_init2(tQ, A->mod.n, A_len - B_len + 1);
+        q = tQ->coeffs;
+    }
+    else
+    {
+        nmod_poly_fit_length(Q, A_len - B_len + 1);
+        q = Q->coeffs;
+    }
+
+    _nmod_poly_divexact(q, A->coeffs, A_len, B->coeffs, B_len, A->mod);
+
+    if (Q == A || Q == B)
+    {
+        nmod_poly_swap(tQ, Q);
+        nmod_poly_clear(tQ);
+    }
+
+    Q->length = A_len - B_len + 1;
+}
diff --git a/src/nmod_poly/find_distinct_nonzero_roots.c b/src/nmod_poly/find_distinct_nonzero_roots.c
index 7ed8a88e77..810c65b8f1 100644
--- a/src/nmod_poly/find_distinct_nonzero_roots.c
+++ b/src/nmod_poly/find_distinct_nonzero_roots.c
@@ -42,7 +42,7 @@ void _nmod_poly_split_rabin(
         goto try_again;
     }
 
-    nmod_poly_div(b, f, a);
+    nmod_poly_divexact(b, f, a);
 
     /* deg a >= deg b */
     if (nmod_poly_degree(a) < nmod_poly_degree(b))
diff --git a/src/nmod_poly/test/main.c b/src/nmod_poly/test/main.c
index 9e9704e519..86a91c45c7 100644
--- a/src/nmod_poly/test/main.c
+++ b/src/nmod_poly/test/main.c
@@ -40,6 +40,7 @@
 #include "t-derivative.c"
 #include "t-discriminant.c"
 #include "t-div.c"
+#include "t-divexact.c"
 #include "t-divides.c"
 #include "t-divides_classical.c"
 #include "t-div_newton_n_preinv.c"
@@ -163,6 +164,7 @@ test_struct tests[] =
     TEST_FUNCTION(nmod_poly_derivative),
     TEST_FUNCTION(nmod_poly_discriminant),
     TEST_FUNCTION(nmod_poly_div),
+    TEST_FUNCTION(nmod_poly_divexact),
     TEST_FUNCTION(nmod_poly_divides),
     TEST_FUNCTION(nmod_poly_divides_classical),
     TEST_FUNCTION(nmod_poly_div_newton_n_preinv),
diff --git a/src/nmod_poly/test/t-divexact.c b/src/nmod_poly/test/t-divexact.c
new file mode 100644
index 0000000000..3d99af6b09
--- /dev/null
+++ b/src/nmod_poly/test/t-divexact.c
@@ -0,0 +1,77 @@
+/*
+    Copyright (C) 2011 William Hart
+    Copyright (C) 2024 Fredrik Johansson
+
+    This file is part of FLINT.
+
+    FLINT is free software: you can redistribute it and/or modify it under
+    the terms of the GNU Lesser General Public License (LGPL) as published
+    by the Free Software Foundation; either version 2.1 of the License, or
+    (at your option) any later version.  See <https://www.gnu.org/licenses/>.
+*/
+
+#include "test_helpers.h"
+#include "ulong_extras.h"
+#include "nmod_poly.h"
+
+TEST_FUNCTION_START(nmod_poly_divexact, state)
+{
+    int i, result;
+
+    for (i = 0; i < 100 * flint_test_multiplier(); i++)
+    {
+        nmod_poly_t a, b, ab, q;
+        int aliasing;
+
+        mp_limb_t n;
+        do n = n_randtest_not_zero(state);
+        while (!n_is_probabprime(n));
+
+        nmod_poly_init(a, n);
+        nmod_poly_init(b, n);
+        nmod_poly_init(ab, n);
+        nmod_poly_init(q, n);
+
+        nmod_poly_randtest(a, state, n_randint(state, 400));
+        do nmod_poly_randtest(b, state, n_randint(state, 400));
+        while (b->length == 0);
+
+        nmod_poly_mul(ab, a, b);
+
+        aliasing = n_randint(state, 3);
+
+        switch (aliasing)
+        {
+            case 0:
+                nmod_poly_divexact(q, ab, b);
+                break;
+            case 1:
+                nmod_poly_set(q, ab);
+                nmod_poly_divexact(q, q, b);
+                break;
+            default:
+                nmod_poly_set(q, b);
+                nmod_poly_divexact(q, ab, q);
+                break;
+        }
+
+        result = (nmod_poly_equal(q, a));
+        if (!result)
+        {
+            flint_printf("FAIL:\n");
+            nmod_poly_print(a), flint_printf("\n\n");
+            nmod_poly_print(b), flint_printf("\n\n");
+            nmod_poly_print(q), flint_printf("\n\n");
+            flint_printf("n = %wd\n", n);
+            fflush(stdout);
+            flint_abort();
+        }
+
+        nmod_poly_clear(a);
+        nmod_poly_clear(b);
+        nmod_poly_clear(ab);
+        nmod_poly_clear(q);
+    }
+
+    TEST_FUNCTION_END(state);
+}
diff --git a/src/nmod_poly_factor/factor_berlekamp.c b/src/nmod_poly_factor/factor_berlekamp.c
index f4882a5b12..098a80da71 100644
--- a/src/nmod_poly_factor/factor_berlekamp.c
+++ b/src/nmod_poly_factor/factor_berlekamp.c
@@ -210,7 +210,7 @@ __nmod_poly_factor_berlekamp(nmod_poly_factor_t factors,
 
         nmod_poly_init_mod(Q, f->mod);
 
-        nmod_poly_div(Q, f, g);
+        nmod_poly_divexact(Q, f, g);
 
         if (!nmod_poly_is_zero(Q))
             nmod_poly_make_monic(Q, Q);
diff --git a/src/nmod_poly_factor/factor_equal_deg.c b/src/nmod_poly_factor/factor_equal_deg.c
index b2e63d3ce4..e6300aa2b4 100644
--- a/src/nmod_poly_factor/factor_equal_deg.c
+++ b/src/nmod_poly_factor/factor_equal_deg.c
@@ -38,7 +38,7 @@ nmod_poly_factor_equal_deg(nmod_poly_factor_t factors,
         flint_randclear(state);
 
         nmod_poly_init_mod(g, pol->mod);
-        nmod_poly_div(g, pol, f);
+        nmod_poly_divexact(g, pol, f);
 
         nmod_poly_factor_equal_deg(factors, f, d);
         nmod_poly_clear(f);
diff --git a/src/nmod_poly_factor/factor_squarefree.c b/src/nmod_poly_factor/factor_squarefree.c
index 704e799e7c..e5a041bd25 100644
--- a/src/nmod_poly_factor/factor_squarefree.c
+++ b/src/nmod_poly_factor/factor_squarefree.c
@@ -76,7 +76,7 @@ nmod_poly_factor_squarefree(nmod_poly_factor_t res, const nmod_poly_t f)
         nmod_poly_t h, z;
 
         nmod_poly_gcd(g, f, f_d);
-        nmod_poly_div(g_1, f, g);
+        nmod_poly_divexact(g_1, f, g);
 
         i = 1;
 
@@ -87,7 +87,7 @@ nmod_poly_factor_squarefree(nmod_poly_factor_t res, const nmod_poly_t f)
         while (!nmod_poly_is_one(g_1))
         {
             nmod_poly_gcd(h, g_1, g);
-            nmod_poly_div(z, g_1, h);
+            nmod_poly_divexact(z, g_1, h);
 
             /* out <- out.z */
             if (z->length > 1)
@@ -101,7 +101,7 @@ nmod_poly_factor_squarefree(nmod_poly_factor_t res, const nmod_poly_t f)
 
             i++;
             nmod_poly_set(g_1, h);
-            nmod_poly_div(g, g, h);
+            nmod_poly_divexact(g, g, h);
         }
 
         nmod_poly_clear(h);
diff --git a/src/nmod_poly_mat/fflu.c b/src/nmod_poly_mat/fflu.c
index b9f29f775e..c81b161235 100644
--- a/src/nmod_poly_mat/fflu.c
+++ b/src/nmod_poly_mat/fflu.c
@@ -84,7 +84,7 @@ nmod_poly_mat_fflu(nmod_poly_mat_t B, nmod_poly_t den, slong * perm,
                 nmod_poly_mul(t, E(j, pivot_col), E(pivot_row, k));
                 nmod_poly_sub(E(j, k), E(j, k), t);
                 if (pivot_row > 0)
-                    nmod_poly_div(E(j, k), E(j, k), den);
+                    nmod_poly_divexact(E(j, k), E(j, k), den);
             }
         }
 
diff --git a/src/nmod_poly_mat/rref.c b/src/nmod_poly_mat/rref.c
index 2bb10ddf2e..811e9b6ab3 100644
--- a/src/nmod_poly_mat/rref.c
+++ b/src/nmod_poly_mat/rref.c
@@ -70,7 +70,7 @@ nmod_poly_mat_rref(nmod_poly_mat_t R, nmod_poly_t den, const nmod_poly_mat_t A)
                     nmod_poly_sub(tmp, tmp, tmp2);
                 }
 
-                nmod_poly_div(nmod_poly_mat_entry(R, i, nonpivots[k]),
+                nmod_poly_divexact(nmod_poly_mat_entry(R, i, nonpivots[k]),
                     tmp, nmod_poly_mat_entry(R, i, pivots[i]));
             }
         }
diff --git a/src/nmod_poly_mat/solve_fflu_precomp.c b/src/nmod_poly_mat/solve_fflu_precomp.c
index 26bc9630ce..b5689cbc72 100644
--- a/src/nmod_poly_mat/solve_fflu_precomp.c
+++ b/src/nmod_poly_mat/solve_fflu_precomp.c
@@ -65,7 +65,7 @@ nmod_poly_mat_solve_fflu_precomp(nmod_poly_mat_t X,
                 nmod_poly_mul(T, LU(j, i), XX(i, k));
                 nmod_poly_sub(XX(j, k), XX(j, k), T);
                 if (i > 0)
-                    nmod_poly_div(XX(j, k), XX(j, k), LU(i-1, i-1));
+                    nmod_poly_divexact(XX(j, k), XX(j, k), LU(i-1, i-1));
             }
         }
 
@@ -78,7 +78,7 @@ nmod_poly_mat_solve_fflu_precomp(nmod_poly_mat_t X,
                 nmod_poly_mul(T, XX(j, k), LU(i, j));
                 nmod_poly_sub(XX(i, k), XX(i, k), T);
             }
-            nmod_poly_div(XX(i, k), XX(i, k), LU(i, i));
+            nmod_poly_divexact(XX(i, k), XX(i, k), LU(i, i));
         }
     }