Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace GCC __ATOMIC* built-ins with std::atomic for interpreter.cc #4976

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 76 additions & 57 deletions python/src/interpreter.cc
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <atomic>
#include <iostream>
#include <map>
#include <memory>
Expand All @@ -14,55 +15,47 @@ enum class MemSemantic { ACQUIRE_RELEASE, ACQUIRE, RELEASE, RELAXED };
enum class RMWOp { ADD, FADD, AND, OR, XOR, XCHG, MAX, MIN, UMIN, UMAX };

std::map<MemSemantic, int> mem_semantic_map = {
{MemSemantic::ACQUIRE_RELEASE, __ATOMIC_ACQ_REL},
{MemSemantic::ACQUIRE, __ATOMIC_ACQUIRE},
{MemSemantic::RELEASE, __ATOMIC_RELEASE},
{MemSemantic::RELAXED, __ATOMIC_RELAXED},
{MemSemantic::ACQUIRE_RELEASE, static_cast<int>(std::memory_order_acq_rel)},
{MemSemantic::ACQUIRE, static_cast<int>(std::memory_order_acquire)},
{MemSemantic::RELEASE, static_cast<int>(std::memory_order_release)},
{MemSemantic::RELAXED, static_cast<int>(std::memory_order_relaxed)},
};

// Use compiler builtin atomics instead of std::atomic which requires
// each variable to be declared as atomic.
// Currently work for clang and gcc.
template <bool is_min, typename T> T atomic_cmp(T *ptr, T val, int order) {
template <bool is_min, typename T>
T atomic_cmp(std::atomic<T> *ptr, T val, std::memory_order order) {
auto cmp = [](T old, T val) {
if constexpr (is_min) {
return old > val;
} else {
return old < val;
}
};

// First load
T old_val = __atomic_load_n(ptr, order);
T old_val = ptr->load(order);
while (cmp(old_val, val)) {
if (__atomic_compare_exchange(ptr, &old_val, &val, false, order, order)) {
if (ptr->compare_exchange_weak(old_val, val, order, order)) {
break;
}
}
return old_val;
}

template <typename T> T atomic_fadd(T *ptr, T val, int order) {
T old_val;
T new_val;
// First load
// Load ptr as if uint32_t or uint64_t and then memcpy to T
if constexpr (sizeof(T) == 4) {
uint32_t tmp = __atomic_load_n(reinterpret_cast<uint32_t *>(ptr), order);
std::memcpy(&old_val, &tmp, sizeof(T));
} else if constexpr (sizeof(T) == 8) {
uint64_t tmp = __atomic_load_n(reinterpret_cast<uint64_t *>(ptr), order);
std::memcpy(&old_val, &tmp, sizeof(T));
} else {
throw std::invalid_argument("Unsupported data type");
}
while (true) {
new_val = old_val + val;
if (__atomic_compare_exchange(ptr, &old_val, &new_val, false, order,
order)) {
break;
}
}
return old_val;
template <typename T>
T atomic_fadd(std::atomic<T> *loc, T value, std::memory_order order) {
static_assert(std::is_floating_point<T>::value,
"T must be a floating-point type");

T old_value = loc->load(order);
T new_value;
do {
new_value = old_value + value;
} while (!loc->compare_exchange_weak(old_value, new_value, order, order));

return old_value;
}

class AtomicOp {
Expand Down Expand Up @@ -95,13 +88,15 @@ template <typename DType> class AtomicRMWOpBase : public AtomicOp {
protected:
void applyAt(void *loc, size_t i) override final {
if (mask[i]) {
std::atomic<DType> *atomic_ptr = static_cast<std::atomic<DType> *>(loc);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While this usually works in practice, it's also undefined behavior which isn't great. C++20 has std::atomic_ref which would fix this issue but we don't compile with C++20 yet.

*(static_cast<DType *>(ret) + i) =
applyAtMasked(static_cast<DType *>(loc),
*(static_cast<const DType *>(val) + i), order);
applyAtMasked(atomic_ptr, *(static_cast<const DType *>(val) + i),
std::memory_order(order));
}
}

virtual DType applyAtMasked(DType *loc, const DType value, int order) = 0;
virtual DType applyAtMasked(std::atomic<DType> *loc, const DType value,
std::memory_order order) = 0;

const void *val;
void *ret;
Expand All @@ -121,8 +116,9 @@ class AtomicRMWOp<DType, Op, std::enable_if_t<Op == RMWOp::ADD>>
using AtomicRMWOpBase<DType>::AtomicRMWOpBase;

protected:
DType applyAtMasked(DType *loc, const DType value, int order) override {
return __atomic_fetch_add(loc, value, order);
DType applyAtMasked(std::atomic<DType> *loc, const DType value,
std::memory_order order) override {
return std::atomic_fetch_add(loc, value);
}
};

Expand All @@ -133,7 +129,9 @@ class AtomicRMWOp<DType, Op, std::enable_if_t<Op == RMWOp::FADD>>
using AtomicRMWOpBase<DType>::AtomicRMWOpBase;

protected:
DType applyAtMasked(DType *loc, const DType value, int order) override {
DType applyAtMasked(std::atomic<DType> *loc, const DType value,
std::memory_order order) override {

return atomic_fadd(loc, value, order);
}
};
Expand All @@ -145,8 +143,9 @@ class AtomicRMWOp<DType, Op, std::enable_if_t<Op == RMWOp::AND>>
using AtomicRMWOpBase<DType>::AtomicRMWOpBase;

protected:
DType applyAtMasked(DType *loc, const DType value, int order) override {
return __atomic_fetch_and(loc, value, order);
DType applyAtMasked(std::atomic<DType> *loc, const DType value,
std::memory_order order) override {
return std::atomic_fetch_and(loc, value);
}
};

Expand All @@ -157,8 +156,9 @@ class AtomicRMWOp<DType, Op, std::enable_if_t<Op == RMWOp::OR>>
using AtomicRMWOpBase<DType>::AtomicRMWOpBase;

protected:
DType applyAtMasked(DType *loc, const DType value, int order) override {
return __atomic_fetch_or(loc, value, order);
DType applyAtMasked(std::atomic<DType> *loc, const DType value,
std::memory_order order) override {
return std::atomic_fetch_or(loc, value);
}
};

Expand All @@ -169,8 +169,9 @@ class AtomicRMWOp<DType, Op, std::enable_if_t<Op == RMWOp::XOR>>
using AtomicRMWOpBase<DType>::AtomicRMWOpBase;

protected:
DType applyAtMasked(DType *loc, const DType value, int order) override {
return __atomic_fetch_xor(loc, value, order);
DType applyAtMasked(std::atomic<DType> *loc, const DType value,
std::memory_order order) override {
return std::atomic_fetch_xor(loc, value);
}
};

Expand All @@ -182,7 +183,8 @@ class AtomicRMWOp<DType, Op,
using AtomicRMWOpBase<DType>::AtomicRMWOpBase;

protected:
DType applyAtMasked(DType *loc, const DType value, int order) override {
DType applyAtMasked(std::atomic<DType> *loc, const DType value,
std::memory_order order) override {
return atomic_cmp</*is_min=*/false>(loc, value, order);
}
};
Expand All @@ -195,7 +197,8 @@ class AtomicRMWOp<DType, Op,
using AtomicRMWOpBase<DType>::AtomicRMWOpBase;

protected:
DType applyAtMasked(DType *loc, const DType value, int order) override {
DType applyAtMasked(std::atomic<DType> *loc, const DType value,
std::memory_order order) override {
return atomic_cmp</*is_min=*/true>(loc, value, order);
}
};
Expand All @@ -207,8 +210,9 @@ class AtomicRMWOp<DType, Op, std::enable_if_t<Op == RMWOp::XCHG>>
using AtomicRMWOpBase<DType>::AtomicRMWOpBase;

protected:
DType applyAtMasked(DType *loc, const DType value, int order) override {
return __atomic_exchange_n(loc, value, order);
DType applyAtMasked(std::atomic<DType> *loc, const DType value,
std::memory_order order) override {
return loc->exchange(value, order);
}
};

Expand All @@ -224,25 +228,40 @@ class AtomicCASOp : public AtomicOp {
// Atomic operations perform bitwise comparison, so it's safe to
// use number of bytes (itemsize) to determine the type of pointers
if (itemsize == 1) {
std::atomic<uint8_t> *atomic_loc =
reinterpret_cast<std::atomic<uint8_t> *>(loc);
uint8_t desired_val = *(static_cast<const uint8_t *>(desired) + i);
__atomic_compare_exchange_n(static_cast<uint8_t *>(loc),
static_cast<uint8_t *>(expected) + i,
desired_val, false, order, order);
uint8_t *expected_uint = static_cast<uint8_t *>(expected);
// Perform the compare and exchange operation
atomic_loc->compare_exchange_strong(*(expected_uint + i), desired_val,
std::memory_order(order),
std::memory_order(order));

} else if (itemsize == 2) {
std::atomic<uint16_t> *atomic_loc =
reinterpret_cast<std::atomic<uint16_t> *>(loc);
uint16_t desired_val = *(static_cast<const uint16_t *>(desired) + i);
__atomic_compare_exchange_n(static_cast<uint16_t *>(loc),
static_cast<uint16_t *>(expected) + i,
desired_val, false, order, order);
uint16_t *expected_uint = static_cast<uint16_t *>(expected);
atomic_loc->compare_exchange_strong(*(expected_uint + i), desired_val,
std::memory_order(order),
std::memory_order(order));
} else if (itemsize == 4) {
std::atomic<uint32_t> *atomic_loc =
reinterpret_cast<std::atomic<uint32_t> *>(loc);
uint32_t desired_val = *(static_cast<const uint32_t *>(desired) + i);
__atomic_compare_exchange_n(static_cast<uint32_t *>(loc),
static_cast<uint32_t *>(expected) + i,
desired_val, false, order, order);
uint32_t *expected_uint = static_cast<uint32_t *>(expected);
atomic_loc->compare_exchange_strong(*(expected_uint + i), desired_val,
std::memory_order(order),
std::memory_order(order));
} else if (itemsize == 8) {
uint64_t desired_val = *(static_cast<const uint64_t *>(desired) + i);
__atomic_compare_exchange_n(static_cast<uint64_t *>(loc),
static_cast<uint64_t *>(expected) + i,
desired_val, false, order, order);
std::atomic<uint64_t> *atomic_loc =
static_cast<std::atomic<uint64_t> *>(loc);
uint64_t *expected_uint = static_cast<uint64_t *>(expected);
atomic_loc->compare_exchange_strong(*(expected_uint + i), desired_val,
std::memory_order(order),
std::memory_order(order));

} else {
// The ‘__atomic’ builtins can be used with any integral scalar or pointer
// type that is 1, 2, 4, or 8 bytes in length. 16-byte integral types are
Expand Down