From 675a9d29535f3071cd3ecc547ef56f604305ad20 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 11:58:17 +0000
Subject: [PATCH 1/3] Shift then mask, rather than mask then shift

This may allow for small optimizations with larger float types since
`u32` math can be used after shifting. LLVM may be already getting this
anyway.
---
 src/math/support/float_traits.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 3aa0d844..c72046f8 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -1,6 +1,6 @@
 use core::{fmt, mem, ops};
 
-use super::int_traits::{CastFrom, CastInto, Int, MinInt};
+use super::int_traits::{CastFrom, Int, MinInt};
 
 /// Trait for some basic operations on floats
 #[allow(dead_code)]
@@ -103,7 +103,7 @@ pub trait Float:
 
     /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero.
     fn exp(self) -> i32 {
-        ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS).cast()
+        (u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_MAX).signed()
     }
 
     /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero.

From 1a60db7e5817f4dd603fcc466f631685662ffb4d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 11:17:03 +0000
Subject: [PATCH 2/3] Add a generic version of `ceil`

---
 crates/libm-test/src/f8_impl.rs  |  2 ++
 src/math/ceil.rs                 | 42 +-------------------------
 src/math/ceilf.rs                | 51 +-------------------------------
 src/math/generic/mod.rs          |  2 ++
 src/math/support/float_traits.rs |  2 ++
 5 files changed, 8 insertions(+), 91 deletions(-)

diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs
index 299553d2..96b78392 100644
--- a/crates/libm-test/src/f8_impl.rs
+++ b/crates/libm-test/src/f8_impl.rs
@@ -30,6 +30,8 @@ impl Float for f8 {
     const INFINITY: Self = Self(0b0_1111_000);
     const NEG_INFINITY: Self = Self(0b1_1111_000);
     const NAN: Self = Self(0b0_1111_100);
+    // FIXME: incorrect values
+    const EPSILON: Self = Self::ZERO;
     const PI: Self = Self::ZERO;
     const NEG_PI: Self = Self::ZERO;
     const FRAC_PI_2: Self = Self::ZERO;
diff --git a/src/math/ceil.rs b/src/math/ceil.rs
index 398bfee4..535f434a 100644
--- a/src/math/ceil.rs
+++ b/src/math/ceil.rs
@@ -1,8 +1,3 @@
-#![allow(unreachable_code)]
-use core::f64;
-
-const TOINT: f64 = 1. / f64::EPSILON;
-
 /// Ceil (f64)
 ///
 /// Finds the nearest integer greater than or equal to `x`.
@@ -15,40 +10,5 @@ pub fn ceil(x: f64) -> f64 {
         args: x,
     }
 
-    let u: u64 = x.to_bits();
-    let e: i64 = ((u >> 52) & 0x7ff) as i64;
-    let y: f64;
-
-    if e >= 0x3ff + 52 || x == 0. {
-        return x;
-    }
-    // y = int(x) - x, where int(x) is an integer neighbor of x
-    y = if (u >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x };
-    // special case because of non-nearest rounding modes
-    if e < 0x3ff {
-        force_eval!(y);
-        return if (u >> 63) != 0 { -0. } else { 1. };
-    }
-    if y < 0. { x + y + 1. } else { x + y }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(ceil(1.1), 2.0);
-        assert_eq!(ceil(2.9), 3.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil
-    #[test]
-    fn spec_tests() {
-        // Not Asserted: that the current rounding mode has no effect.
-        assert!(ceil(f64::NAN).is_nan());
-        for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY].iter().copied() {
-            assert_eq!(ceil(f), f);
-        }
-    }
+    super::generic::ceil(x)
 }
diff --git a/src/math/ceilf.rs b/src/math/ceilf.rs
index 9e8e78e3..66d44189 100644
--- a/src/math/ceilf.rs
+++ b/src/math/ceilf.rs
@@ -1,5 +1,3 @@
-use core::f32;
-
 /// Ceil (f32)
 ///
 /// Finds the nearest integer greater than or equal to `x`.
@@ -11,52 +9,5 @@ pub fn ceilf(x: f32) -> f32 {
         args: x,
     }
 
-    let mut ui = x.to_bits();
-    let e = (((ui >> 23) & 0xff).wrapping_sub(0x7f)) as i32;
-
-    if e >= 23 {
-        return x;
-    }
-    if e >= 0 {
-        let m = 0x007fffff >> e;
-        if (ui & m) == 0 {
-            return x;
-        }
-        force_eval!(x + f32::from_bits(0x7b800000));
-        if ui >> 31 == 0 {
-            ui += m;
-        }
-        ui &= !m;
-    } else {
-        force_eval!(x + f32::from_bits(0x7b800000));
-        if ui >> 31 != 0 {
-            return -0.0;
-        } else if ui << 1 != 0 {
-            return 1.0;
-        }
-    }
-    f32::from_bits(ui)
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(ceilf(1.1), 2.0);
-        assert_eq!(ceilf(2.9), 3.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil
-    #[test]
-    fn spec_tests() {
-        // Not Asserted: that the current rounding mode has no effect.
-        assert!(ceilf(f32::NAN).is_nan());
-        for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY].iter().copied() {
-            assert_eq!(ceilf(f), f);
-        }
-    }
+    super::generic::ceil(x)
 }
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index e5166ca1..ee1877ea 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -1,7 +1,9 @@
+mod ceil;
 mod copysign;
 mod fabs;
 mod trunc;
 
+pub use ceil::ceil;
 pub use copysign::copysign;
 pub use fabs::fabs;
 pub use trunc::trunc;
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index c72046f8..c8119dd0 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -34,6 +34,7 @@ pub trait Float:
     const NAN: Self;
     const MAX: Self;
     const MIN: Self;
+    const EPSILON: Self;
     const PI: Self;
     const NEG_PI: Self;
     const FRAC_PI_2: Self;
@@ -175,6 +176,7 @@ macro_rules! float_impl {
             const MAX: Self = -Self::MIN;
             // Sign bit set, saturated mantissa, saturated exponent with last bit zeroed
             const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS));
+            const EPSILON: Self = <$ty>::EPSILON;
 
             const PI: Self = core::$ty::consts::PI;
             const NEG_PI: Self = -Self::PI;

From 21c19b5486947620c7f8d8425d517c82fa197974 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 11:17:17 +0000
Subject: [PATCH 3/3] Add `ceilf16` and `ceilf128`

Use the generic algorithms to provide implementations for these
routines.
---
 crates/libm-macros/src/shared.rs             |  4 +-
 crates/libm-test/benches/random.rs           |  9 ++-
 crates/libm-test/src/domain.rs               | 10 +++
 crates/libm-test/src/mpfloat.rs              |  4 ++
 crates/libm-test/tests/compare_built_musl.rs | 17 ++++-
 crates/util/src/main.rs                      |  9 ++-
 etc/function-definitions.json                | 20 +++++-
 etc/function-list.txt                        |  2 +
 src/math/ceilf128.rs                         |  7 ++
 src/math/ceilf16.rs                          |  7 ++
 src/math/generic/ceil.rs                     | 70 ++++++++++++++++++++
 src/math/mod.rs                              |  4 ++
 12 files changed, 154 insertions(+), 9 deletions(-)
 create mode 100644 src/math/ceilf128.rs
 create mode 100644 src/math/ceilf16.rs
 create mode 100644 src/math/generic/ceil.rs

diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index 24fccd6f..938a7181 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -9,7 +9,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F16,
         Signature { args: &[Ty::F16], returns: &[Ty::F16] },
         None,
-        &["fabsf16", "truncf16"],
+        &["ceilf16", "fabsf16", "truncf16"],
     ),
     (
         // `fn(f32) -> f32`
@@ -40,7 +40,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F128,
         Signature { args: &[Ty::F128], returns: &[Ty::F128] },
         None,
-        &["fabsf128", "truncf128"],
+        &["ceilf128", "fabsf128", "truncf128"],
     ),
     (
         // `(f16, f16) -> f16`
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 8c6afff2..e612f233 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -117,7 +117,14 @@ libm_macros::for_each_function! {
         exp10 | exp10f | exp2 | exp2f => (true, Some(musl_math_sys::MACRO_FN_NAME)),
 
         // Musl does not provide `f16` and `f128` functions
-        copysignf16 | copysignf128 | fabsf16 | fabsf128 | truncf16 | truncf128  => (false, None),
+        ceilf128
+        | ceilf16
+        | copysignf128
+        | copysignf16
+        | fabsf128
+        | fabsf16
+        | truncf128
+        | truncf16 => (false, None),
 
         // By default we never skip (false) and always have a musl function available
         _ => (false, Some(musl_math_sys::MACRO_FN_NAME))
diff --git a/crates/libm-test/src/domain.rs b/crates/libm-test/src/domain.rs
index adafb9fa..749a2594 100644
--- a/crates/libm-test/src/domain.rs
+++ b/crates/libm-test/src/domain.rs
@@ -200,6 +200,16 @@ impl HasDomain<f128> for crate::op::fabsf128::Routine {
     const DOMAIN: Domain<f128> = Domain::<f128>::UNBOUNDED;
 }
 
+#[cfg(f16_enabled)]
+impl HasDomain<f16> for crate::op::ceilf16::Routine {
+    const DOMAIN: Domain<f16> = Domain::<f16>::UNBOUNDED;
+}
+
+#[cfg(f128_enabled)]
+impl HasDomain<f128> for crate::op::ceilf128::Routine {
+    const DOMAIN: Domain<f128> = Domain::<f128>::UNBOUNDED;
+}
+
 #[cfg(f16_enabled)]
 impl HasDomain<f16> for crate::op::truncf16::Routine {
     const DOMAIN: Domain<f16> = Domain::<f16>::UNBOUNDED;
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index a4aad81f..4e183bfc 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -137,6 +137,8 @@ libm_macros::for_each_function! {
         // Most of these need a manual implementation
         ceil,
         ceilf,
+        ceilf128,
+        ceilf16,
         copysign,
         copysignf,
         copysignf128,
@@ -237,12 +239,14 @@ impl_no_round! {
 #[cfg(f16_enabled)]
 impl_no_round! {
     fabsf16 => abs_mut;
+    ceilf16 => ceil_mut;
     truncf16 => trunc_mut;
 }
 
 #[cfg(f128_enabled)]
 impl_no_round! {
     fabsf128 => abs_mut;
+    ceilf128 => ceil_mut;
     truncf128 => trunc_mut;
 }
 
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index a395c6c5..49ed01e5 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -48,7 +48,16 @@ where
 libm_macros::for_each_function! {
     callback: musl_rand_tests,
     // Musl does not support `f16` and `f128` on all platforms.
-    skip: [copysignf16, copysignf128, fabsf16, fabsf128, truncf16, truncf128],
+    skip: [
+        ceilf128,
+        ceilf16,
+        copysignf128,
+        copysignf16,
+        fabsf128,
+        fabsf16,
+        truncf128,
+        truncf16,
+    ],
     attributes: [
         #[cfg_attr(x86_no_sse, ignore)] // FIXME(correctness): wrong result on i586
         [exp10, exp10f, exp2, exp2f, rint]
@@ -144,9 +153,11 @@ libm_macros::for_each_function! {
         ynf,
 
         // Not provided by musl
-        fabsf16,
+        ceilf128,
+        ceilf16,
         fabsf128,
-        truncf16,
+        fabsf16,
         truncf128,
+        truncf16,
     ],
 }
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index c8a03068..bc132848 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -84,7 +84,14 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
         emit_types: [CFn, RustFn, RustArgs],
         extra: (basis, op, inputs),
         fn_extra: match MACRO_FN_NAME {
-            copysignf16 | copysignf128 | fabsf16 | fabsf128 | truncf16 | truncf128  => None,
+            ceilf128
+            | ceilf16
+            | copysignf128
+            | copysignf16
+            | fabsf128
+            | fabsf16
+            | truncf128
+            | truncf16 => None,
             _ => Some(musl_math_sys::MACRO_FN_NAME)
         }
     }
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 86fa0210..3a809771 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -109,17 +109,33 @@
             "src/libm_helper.rs",
             "src/math/arch/i586.rs",
             "src/math/arch/wasm32.rs",
-            "src/math/ceil.rs"
+            "src/math/ceil.rs",
+            "src/math/generic/ceil.rs"
         ],
         "type": "f64"
     },
     "ceilf": {
         "sources": [
             "src/math/arch/wasm32.rs",
-            "src/math/ceilf.rs"
+            "src/math/ceilf.rs",
+            "src/math/generic/ceil.rs"
         ],
         "type": "f32"
     },
+    "ceilf128": {
+        "sources": [
+            "src/math/ceilf128.rs",
+            "src/math/generic/ceil.rs"
+        ],
+        "type": "f128"
+    },
+    "ceilf16": {
+        "sources": [
+            "src/math/ceilf16.rs",
+            "src/math/generic/ceil.rs"
+        ],
+        "type": "f16"
+    },
     "copysign": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index 8aa90176..87672a9e 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -17,6 +17,8 @@ cbrt
 cbrtf
 ceil
 ceilf
+ceilf128
+ceilf16
 copysign
 copysignf
 copysignf128
diff --git a/src/math/ceilf128.rs b/src/math/ceilf128.rs
new file mode 100644
index 00000000..89980858
--- /dev/null
+++ b/src/math/ceilf128.rs
@@ -0,0 +1,7 @@
+/// Ceil (f128)
+///
+/// Finds the nearest integer greater than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ceilf128(x: f128) -> f128 {
+    super::generic::ceil(x)
+}
diff --git a/src/math/ceilf16.rs b/src/math/ceilf16.rs
new file mode 100644
index 00000000..2af67eff
--- /dev/null
+++ b/src/math/ceilf16.rs
@@ -0,0 +1,7 @@
+/// Ceil (f16)
+///
+/// Finds the nearest integer greater than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ceilf16(x: f16) -> f16 {
+    super::generic::ceil(x)
+}
diff --git a/src/math/generic/ceil.rs b/src/math/generic/ceil.rs
new file mode 100644
index 00000000..ce5be8e7
--- /dev/null
+++ b/src/math/generic/ceil.rs
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: MIT
+ * origin: musl src/math/ceil.c */
+
+use super::super::{CastInto, Float};
+
+pub fn ceil<F: Float>(x: F) -> F {
+    let toint = F::ONE / F::EPSILON;
+
+    // NB: using `exp` here and comparing to values adjusted by `EXP_BIAS` has better
+    // perf than using `exp_unbiased` here.
+    let e = x.exp();
+    let y: F;
+
+    // If the represented value has no fractional part, no truncation is needed.
+    if e >= (F::SIG_BITS + F::EXP_BIAS).cast() || x == F::ZERO {
+        return x;
+    }
+
+    let neg = x.is_sign_negative();
+
+    // y = int(x) - x, where int(x) is an integer neighbor of x.
+    // The `x - t + t - x` method is a way to expose non-round-to-even modes.
+    y = if neg { x - toint + toint - x } else { x + toint - toint - x };
+
+    // Exp < 0;  special case because of non-nearest rounding modes
+    if e < F::EXP_BIAS.cast() {
+        // Raise `FE_INEXACT`
+        force_eval!(y);
+        return if neg { F::NEG_ZERO } else { F::ONE };
+    }
+
+    if y < F::ZERO { x + y + F::ONE } else { x + y }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(ceil(1.1f64), 2.0);
+        assert_eq!(ceil(2.9f64), 3.0);
+    }
+
+    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil
+    #[test]
+    fn spec_tests_f64() {
+        // Not Asserted: that the current rounding mode has no effect.
+        assert!(ceil(f64::NAN).is_nan());
+        for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY].iter().copied() {
+            assert_eq!(ceil(f), f);
+        }
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_eq!(ceil(1.1f32), 2.0);
+        assert_eq!(ceil(2.9f32), 3.0);
+    }
+
+    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil
+    #[test]
+    fn spec_tests_f32() {
+        // Not Asserted: that the current rounding mode has no effect.
+        assert!(ceil(f32::NAN).is_nan());
+        for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY].iter().copied() {
+            assert_eq!(ceil(f), f);
+        }
+    }
+}
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 723be0e1..de4f3b05 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -341,10 +341,12 @@ pub use self::truncf::truncf;
 
 cfg_if! {
     if #[cfg(f16_enabled)] {
+        mod ceilf16;
         mod copysignf16;
         mod fabsf16;
         mod truncf16;
 
+        pub use self::ceilf16::ceilf16;
         pub use self::copysignf16::copysignf16;
         pub use self::fabsf16::fabsf16;
         pub use self::truncf16::truncf16;
@@ -353,10 +355,12 @@ cfg_if! {
 
 cfg_if! {
     if #[cfg(f128_enabled)] {
+        mod ceilf128;
         mod copysignf128;
         mod fabsf128;
         mod truncf128;
 
+        pub use self::ceilf128::ceilf128;
         pub use self::copysignf128::copysignf128;
         pub use self::fabsf128::fabsf128;
         pub use self::truncf128::truncf128;