Skip to content

Commit

Permalink
Optimize atomic float on NVPTX
Browse files Browse the repository at this point in the history
  • Loading branch information
taiki-e committed Jun 4, 2023
1 parent 8fcbe7c commit 5380572
Show file tree
Hide file tree
Showing 7 changed files with 459 additions and 7 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ See the [`atomic128` module's readme](https://github.com/taiki-e/portable-atomic
- <a name="optional-features-float"></a>**`float`**<br>
Provide `AtomicF{32,64}`.

Note that most of `fetch_*` operations of atomic floats are implemented using CAS loops, which can be slower than equivalent operations of atomic integers. ([GPU targets have atomic instructions for float, so we plan to use these instructions for GPU targets in the future.](https://github.com/taiki-e/portable-atomic/issues/34))
Note that most of `fetch_*` operations of atomic floats are implemented using CAS loops, which can be slower than equivalent operations of atomic integers. (GPU targets have atomic instructions for float, so we use these instructions for GPU targets on nightly.)

- **`std`**<br>
Use `std`.
Expand Down
33 changes: 32 additions & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,8 @@ fn main() {
// https://github.com/rust-lang/rust/pull/111331 merged in Rust 1.71 (nightly-2023-05-09).
if !no_asm
&& (target_arch == "powerpc64" && version.probe(60, 2022, 2, 13)
|| target_arch == "s390x" && version.probe(71, 2023, 5, 8))
|| target_arch == "s390x" && version.probe(71, 2023, 5, 8)
|| target_arch == "nvptx64")
&& is_allowed_feature("asm_experimental_arch")
{
println!("cargo:rustc-cfg=portable_atomic_unstable_asm_experimental_arch");
Expand Down Expand Up @@ -318,6 +319,36 @@ fn main() {
false,
);
}
"nvptx64" => {
let mut has_sm_70 = false;
if let Some(rustflags) = env::var_os("CARGO_ENCODED_RUSTFLAGS") {
for mut flag in rustflags.to_string_lossy().split('\x1f') {
flag = strip_prefix(flag, "-C").unwrap_or(flag);
if let Some(flag) = strip_prefix(flag, "target-feature=") {
for s in flag.split(',') {
// TODO: Handles cases where a specific target feature
// implicitly enables another target feature.
match (s.as_bytes().first(), s.get(1..)) {
(Some(b'+'), Some(f)) => {
if let Some(sm) = strip_prefix(f, "sm_") {
if let Ok(sm) = sm.parse::<u32>() {
if sm >= 70 {
has_sm_70 = true;
}
}
}
}
(Some(b'-'), Some(_f)) => {
// TODO
}
_ => {}
}
}
}
}
}
target_feature_if("sm_70", has_sm_70, &version, None, false);
}
_ => {}
}
}
Expand Down
5 changes: 2 additions & 3 deletions src/imp/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
// Note that most of `fetch_*` operations of atomic floats are implemented using
// CAS loops, which can be slower than equivalent operations of atomic integers.
//
// GPU targets have atomic instructions for float, so GPU targets will use
// architecture-specific implementations instead of this implementation in the
// future: https://github.com/taiki-e/portable-atomic/issues/34
// GPU targets have atomic instructions for float, so we use these instructions
// for GPU targets on nightly (see nvptx.rs).

#![cfg_attr(
all(target_pointer_width = "16", not(feature = "fallback")),
Expand Down
22 changes: 22 additions & 0 deletions src/imp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,15 @@ mod arm_linux;
#[cfg(target_arch = "msp430")]
pub(crate) mod msp430;

#[cfg(portable_atomic_unstable_asm_experimental_arch)]
#[cfg(feature = "float")]
#[cfg(all(
target_arch = "nvptx64",
any(target_feature = "sm_70", portable_atomic_target_feature = "sm_70"),
portable_atomic_unstable_asm_experimental_arch,
))]
pub(crate) mod nvptx;

#[cfg(any(test, not(feature = "critical-section")))]
#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_cas)))]
#[cfg_attr(
Expand Down Expand Up @@ -199,8 +208,21 @@ mod interrupt;
// Atomic float implementations

#[cfg(feature = "float")]
#[cfg(not(all(
target_arch = "nvptx64",
any(target_feature = "sm_70", portable_atomic_target_feature = "sm_70"),
portable_atomic_unstable_asm_experimental_arch,
)))]
pub(crate) mod float;

#[cfg(feature = "float")]
#[cfg(all(
target_arch = "nvptx64",
any(target_feature = "sm_70", portable_atomic_target_feature = "sm_70"),
portable_atomic_unstable_asm_experimental_arch,
))]
pub(crate) use nvptx as float;

// -----------------------------------------------------------------------------

// Atomic{Isize,Usize,Bool,Ptr}, Atomic{I,U}{8,16}
Expand Down
Loading

0 comments on commit 5380572

Please sign in to comment.