Merge pull request #357 from rust-lang/subnormals

Fix subnormals
rust-lang · Sep 10, 2023 · b6e03e5 · b6e03e5
2 parents 6d10cd1 + 5e57453
commit b6e03e5
Show file tree

Hide file tree

Showing 8 changed files with 316 additions and 40 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -167,40 +167,33 @@ jobs:
             RUSTFLAGS: ${{ matrix.rustflags }}
 
   cross-tests:
-    name: "${{ matrix.target }} (via cross)"
+    name: "${{ matrix.target_feature }} on ${{ matrix.target }} (via cross)"
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
-      # TODO: Sadly, we cant configure target-feature in a meaningful way
-      # because `cross` doesn't tell qemu to enable any non-default cpu
-      # features, nor does it give us a way to do so.
-      #
-      # Ultimately, we'd like to do something like [rust-lang/stdarch][stdarch].
-      # This is a lot more complex... but in practice it's likely that we can just
-      # snarf the docker config from around [here][1000-dockerfiles].
-      #
-      # [stdarch]: https://github.com/rust-lang/stdarch/blob/a5db4eaf/.github/workflows/main.yml#L67
-      # [1000-dockerfiles]: https://github.com/rust-lang/stdarch/tree/a5db4eaf/ci/docker
 
       matrix:
         target:
-          - i586-unknown-linux-gnu
-          # 32-bit arm has a few idiosyncracies like having subnormal flushing
-          # to zero on by default. Ideally we'd set
           - armv7-unknown-linux-gnueabihf
-          - aarch64-unknown-linux-gnu
-          # Note: The issue above means neither of these mips targets will use
-          # MSA (mips simd) but MIPS uses a nonstandard binary representation
-          # for NaNs which makes it worth testing on despite that.
+          - thumbv7neon-unknown-linux-gnueabihf # includes neon by default
+          - aarch64-unknown-linux-gnu           # includes neon by default
+          - powerpc-unknown-linux-gnu
+          - powerpc64le-unknown-linux-gnu       # includes altivec by default
+          - riscv64gc-unknown-linux-gnu
+          # MIPS uses a nonstandard binary representation for NaNs which makes it worth testing
+          # non-nightly since https://github.com/rust-lang/rust/pull/113274
           # - mips-unknown-linux-gnu
           # - mips64-unknown-linux-gnuabi64
-          - riscv64gc-unknown-linux-gnu
-          # TODO this test works, but it appears to time out
-          # - powerpc-unknown-linux-gnu
-          # TODO this test is broken, but it appears to be a problem with QEMU, not us.
-          # - powerpc64le-unknown-linux-gnu
-          # TODO enable this once a new version of cross is released
+          # Lots of errors in QEMU and no real hardware to test on. Not clear if it's QEMU or bad codegen.
           # - powerpc64-unknown-linux-gnu
+        target_feature: [default]
+        include:
+          - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" }
+          # Fails due to QEMU floating point errors, probably handling subnormals incorrectly.
+          # This target is somewhat redundant, since ppc64le has altivec as well.
+          # - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" }
+          # We should test this, but cross currently can't run it
+          # - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" }
 
     steps:
       - uses: actions/checkout@v2
@@ -217,11 +210,27 @@ jobs:
         # being part of the tarball means we can't just use the download/latest
         # URL :(
         run: |
-          CROSS_URL=https://github.com/rust-embedded/cross/releases/download/v0.2.1/cross-v0.2.1-x86_64-unknown-linux-gnu.tar.gz
+          CROSS_URL=https://github.com/cross-rs/cross/releases/download/v0.2.5/cross-x86_64-unknown-linux-gnu.tar.gz
           mkdir -p "$HOME/.bin"
           curl -sfSL --retry-delay 10 --retry 5 "${CROSS_URL}" | tar zxf - -C "$HOME/.bin"
           echo "$HOME/.bin" >> $GITHUB_PATH
 
+      - name: Configure Emulated CPUs
+        run: |
+          echo "CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc -cpu e600" >> $GITHUB_ENV
+          # echo "CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64 -cpu rv64,zba=true,zbb=true,v=true,vlen=256,vext_spec=v1.0" >> $GITHUB_ENV
+
+      - name: Configure RUSTFLAGS
+        shell: bash
+        run: |
+          case "${{ matrix.target_feature }}" in
+            default)
+              echo "RUSTFLAGS=" >> $GITHUB_ENV;;
+            *)
+              echo "RUSTFLAGS=-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV
+              ;;
+          esac
+
       - name: Test (debug)
         run: cross test --verbose --target=${{ matrix.target }}
 

diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs
@@ -336,7 +336,10 @@ macro_rules! impl_trait {
 
             #[inline]
             fn is_subnormal(self) -> Self::Mask {
-                self.abs().simd_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0))
+                // On some architectures (e.g. armv7 and some ppc) subnormals are flushed to zero,
+                // so this comparison must be done with integers.
+                let not_zero = self.abs().to_bits().simd_ne(Self::splat(0.0).to_bits());
+                not_zero & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0))
             }
 
             #[inline]

diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
@@ -6,7 +6,7 @@ macro_rules! impl_unary_op_test {
     { $scalar:ty, $trait:ident :: $fn:ident, $scalar_fn:expr } => {
         test_helpers::test_lanes! {
             fn $fn<const LANES: usize>() {
-                test_helpers::test_unary_elementwise(
+                test_helpers::test_unary_elementwise_flush_subnormals(
                     &<core_simd::simd::Simd<$scalar, LANES> as core::ops::$trait>::$fn,
                     &$scalar_fn,
                     &|_| true,
@@ -31,15 +31,15 @@ macro_rules! impl_binary_op_test {
 
             test_helpers::test_lanes! {
                 fn normal<const LANES: usize>() {
-                    test_helpers::test_binary_elementwise(
+                    test_helpers::test_binary_elementwise_flush_subnormals(
                         &<Simd<$scalar, LANES> as core::ops::$trait>::$fn,
                         &$scalar_fn,
                         &|_, _| true,
                     );
                 }
 
                 fn assign<const LANES: usize>() {
-                    test_helpers::test_binary_elementwise(
+                    test_helpers::test_binary_elementwise_flush_subnormals(
                         &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
                         &$scalar_fn,
                         &|_, _| true,
@@ -126,19 +126,23 @@ macro_rules! impl_common_integer_tests {
 
             fn reduce_sum<const LANES: usize>() {
                 test_helpers::test_1(&|x| {
+                    use test_helpers::subnormals::{flush, flush_in};
                     test_helpers::prop_assert_biteq! (
                         $vector::<LANES>::from_array(x).reduce_sum(),
                         x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add),
+                        flush(x.iter().copied().map(flush_in).fold(0 as $scalar, $scalar::wrapping_add)),
                     );
                     Ok(())
                 });
             }
 
             fn reduce_product<const LANES: usize>() {
                 test_helpers::test_1(&|x| {
+                    use test_helpers::subnormals::{flush, flush_in};
                     test_helpers::prop_assert_biteq! (
                         $vector::<LANES>::from_array(x).reduce_product(),
                         x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul),
+                        flush(x.iter().copied().map(flush_in).fold(1 as $scalar, $scalar::wrapping_mul)),
                     );
                     Ok(())
                 });
@@ -463,15 +467,15 @@ macro_rules! impl_float_tests {
                 }
 
                 fn to_degrees<const LANES: usize>() {
-                    test_helpers::test_unary_elementwise(
+                    test_helpers::test_unary_elementwise_flush_subnormals(
                         &Vector::<LANES>::to_degrees,
                         &Scalar::to_degrees,
                         &|_| true,
                     )
                 }
 
                 fn to_radians<const LANES: usize>() {
-                    test_helpers::test_unary_elementwise(
+                    test_helpers::test_unary_elementwise_flush_subnormals(
                         &Vector::<LANES>::to_radians,
                         &Scalar::to_radians,
                         &|_| true,
@@ -541,7 +545,12 @@ macro_rules! impl_float_tests {
                 }
 
                 fn simd_clamp<const LANES: usize>() {
+                    if cfg!(all(target_arch = "powerpc64", target_feature = "vsx")) {
+                        // https://gitlab.com/qemu-project/qemu/-/issues/1780
+                        return;
+                    }
                     test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
+                        use test_helpers::subnormals::flush_in;
                         for (min, max) in min.iter_mut().zip(max.iter_mut()) {
                             if max < min {
                                 core::mem::swap(min, max);
@@ -558,8 +567,20 @@ macro_rules! impl_float_tests {
                         for i in 0..LANES {
                             result_scalar[i] = value[i].clamp(min[i], max[i]);
                         }
+                        let mut result_scalar_flush = [Scalar::default(); LANES];
+                        for i in 0..LANES {
+                            // Comparisons flush-to-zero, but return value selection is _not_ flushed.
+                            let mut value = value[i];
+                            if flush_in(value) < flush_in(min[i]) {
+                                value = min[i];
+                            }
+                            if flush_in(value) > flush_in(max[i]) {
+                                value = max[i];
+                            }
+                            result_scalar_flush[i] = value
+                        }
                         let result_vector = Vector::from_array(value).simd_clamp(min.into(), max.into()).to_array();
-                        test_helpers::prop_assert_biteq!(result_scalar, result_vector);
+                        test_helpers::prop_assert_biteq!(result_vector, result_scalar, result_scalar_flush);
                         Ok(())
                     })
                 }

diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs
@@ -43,7 +43,7 @@ macro_rules! float_rounding_test {
                 }
 
                 fn fract<const LANES: usize>() {
-                    test_helpers::test_unary_elementwise(
+                    test_helpers::test_unary_elementwise_flush_subnormals(
                         &Vector::<LANES>::fract,
                         &Scalar::fract,
                         &|_| true,

diff --git a/crates/test_helpers/Cargo.toml b/crates/test_helpers/Cargo.toml
@@ -4,10 +4,8 @@ version = "0.1.0"
 edition = "2021"
 publish = false
 
-[dependencies.proptest]
-version = "0.10"
-default-features = false
-features = ["alloc"]
+[dependencies]
+proptest = { version = "0.10", default-features = false, features = ["alloc"] }
 
 [features]
 all_lane_counts = []
diff --git a/crates/test_helpers/src/biteq.rs b/crates/test_helpers/src/biteq.rs
@@ -113,6 +113,27 @@ impl<T: BitEq> core::fmt::Debug for BitEqWrapper<'_, T> {
     }
 }
 
+#[doc(hidden)]
+pub struct BitEqEitherWrapper<'a, T>(pub &'a T, pub &'a T);
+
+impl<T: BitEq> PartialEq<BitEqEitherWrapper<'_, T>> for BitEqWrapper<'_, T> {
+    fn eq(&self, other: &BitEqEitherWrapper<'_, T>) -> bool {
+        self.0.biteq(other.0) || self.0.biteq(other.1)
+    }
+}
+
+impl<T: BitEq> core::fmt::Debug for BitEqEitherWrapper<'_, T> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        if self.0.biteq(self.1) {
+            self.0.fmt(f)
+        } else {
+            self.0.fmt(f)?;
+            write!(f, " or ")?;
+            self.1.fmt(f)
+        }
+    }
+}
+
 #[macro_export]
 macro_rules! prop_assert_biteq {
     { $a:expr, $b:expr $(,)? } => {
@@ -122,5 +143,14 @@ macro_rules! prop_assert_biteq {
             let b = $b;
             proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqWrapper(&b));
         }
-    }
+    };
+    { $a:expr, $b:expr, $c:expr $(,)? } => {
+        {
+            use $crate::biteq::{BitEqWrapper, BitEqEitherWrapper};
+            let a = $a;
+            let b = $b;
+            let c = $c;
+            proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqEitherWrapper(&b, &c));
+        }
+    };
 }