Skip to content

Commit

Permalink
Merge pull request numpy#25247 from Mousius/highway-vqsort-sve
Browse files Browse the repository at this point in the history
ENH: Enable SVE detection for Highway VQSort
  • Loading branch information
seiko2plus authored Dec 11, 2023
2 parents 35c4319 + 682d33c commit 0be4154
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 4 deletions.
9 changes: 8 additions & 1 deletion meson_cpu/arm/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,15 @@ ASIMDFHM = mod_features.new(
args: {'val': '-march=armv8.2-a+fp16fml', 'match': '-march=.*', 'mfilter': '\+.*'},
test_code: files(source_root + '/numpy/distutils/checks/cpu_asimdfhm.c')[0]
)
## Scalable Vector Extensions (SVE)
SVE = mod_features.new(
'SVE', 8, implies: ASIMDHP,
args: {'val': '-march=armv8.2-a+sve', 'match': '-march=.*', 'mfilter': '\+.*'},
test_code: files(source_root + '/numpy/distutils/checks/cpu_sve.c')[0]
)
# TODO: Add support for MSVC
ARM_FEATURES = {
'NEON': NEON, 'NEON_FP16': NEON_FP16, 'NEON_VFPV4': NEON_VFPV4,
'ASIMD': ASIMD, 'ASIMDHP': ASIMDHP, 'ASIMDFHM': ASIMDFHM
'ASIMD': ASIMD, 'ASIMDHP': ASIMDHP, 'ASIMDFHM': ASIMDFHM,
'SVE': SVE
}
2 changes: 1 addition & 1 deletion numpy/_core/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,7 @@ foreach gen_mtargets : [
'highway_qsort.dispatch.h',
'src/npysort/highway_qsort.dispatch.cpp',
[
ASIMD,
SVE, ASIMD,
]
],
]
Expand Down
7 changes: 6 additions & 1 deletion numpy/_core/src/common/npy_cpu_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ static struct {
{NPY_CPU_FEATURE_FPHP, "FPHP"},
{NPY_CPU_FEATURE_ASIMDHP, "ASIMDHP"},
{NPY_CPU_FEATURE_ASIMDDP, "ASIMDDP"},
{NPY_CPU_FEATURE_ASIMDFHM, "ASIMDFHM"}};
{NPY_CPU_FEATURE_ASIMDFHM, "ASIMDFHM"},
{NPY_CPU_FEATURE_SVE, "SVE"}};


NPY_VISIBILITY_HIDDEN PyObject *
Expand Down Expand Up @@ -760,6 +761,7 @@ npy__cpu_init_features_linux(void)
npy__cpu_have[NPY_CPU_FEATURE_ASIMDHP] = (hwcap & NPY__HWCAP_ASIMDHP) != 0;
npy__cpu_have[NPY_CPU_FEATURE_ASIMDDP] = (hwcap & NPY__HWCAP_ASIMDDP) != 0;
npy__cpu_have[NPY_CPU_FEATURE_ASIMDFHM] = (hwcap & NPY__HWCAP_ASIMDFHM) != 0;
npy__cpu_have[NPY_CPU_FEATURE_SVE] = (hwcap & NPY__HWCAP_SVE) != 0;
npy__cpu_init_features_arm8();
} else {
npy__cpu_have[NPY_CPU_FEATURE_NEON] = (hwcap & NPY__HWCAP_NEON) != 0;
Expand Down Expand Up @@ -794,6 +796,9 @@ npy__cpu_init_features(void)
#if defined(NPY_HAVE_ASIMDFHM) || defined(__ARM_FEATURE_FP16FML)
npy__cpu_have[NPY_CPU_FEATURE_ASIMDFHM] = 1;
#endif
#if defined(NPY_HAVE_SVE) || defined(__ARM_FEATURE_SVE)
npy__cpu_have[NPY_CPU_FEATURE_SVE] = 1;
#endif
npy__cpu_init_features_arm8();
#else
#if defined(NPY_HAVE_NEON) || defined(__ARM_NEON__)
Expand Down
2 changes: 2 additions & 0 deletions numpy/_core/src/common/npy_cpu_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ enum npy_cpu_features
NPY_CPU_FEATURE_ASIMDDP = 306,
// ARMv8.2 single&half-precision multiply
NPY_CPU_FEATURE_ASIMDFHM = 307,
// Scalable Vector Extensions (SVE)
NPY_CPU_FEATURE_SVE = 308,

// IBM/ZARCH
NPY_CPU_FEATURE_VX = 350,
Expand Down
1 change: 1 addition & 0 deletions numpy/_core/src/common/npy_cpuinfo_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
#define NPY__HWCAP_FPHP (1 << 9)
#define NPY__HWCAP_ASIMDHP (1 << 10)
#define NPY__HWCAP_ASIMDDP (1 << 20)
#define NPY__HWCAP_SVE (1 << 22)
#define NPY__HWCAP_ASIMDFHM (1 << 23)
/*
* Get the size of a file by reading it until the end. This is needed
Expand Down
2 changes: 1 addition & 1 deletion numpy/_core/tests/test_cpu_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def load_flags(self):
@pytest.mark.skipif(not is_linux or not is_arm, reason="Only for Linux and ARM")
class Test_ARM_Features(AbstractTest):
features = [
"NEON", "ASIMD", "FPHP", "ASIMDHP", "ASIMDDP", "ASIMDFHM"
"SVE", "NEON", "ASIMD", "FPHP", "ASIMDHP", "ASIMDDP", "ASIMDFHM"
]
features_groups = dict(
NEON_FP16 = ["NEON", "HALF"],
Expand Down
14 changes: 14 additions & 0 deletions numpy/distutils/checks/cpu_sve.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#include <arm_sve.h>

int accumulate(svint64_t a, svint64_t b) {
svbool_t p = svptrue_b64();
return svaddv(p, svmla_z(p, a, a, b));
}

int main(void)
{
svbool_t p = svptrue_b64();
svint64_t a = svdup_s64(1);
svint64_t b = svdup_s64(2);
return accumulate(a, b);
}

0 comments on commit 0be4154

Please sign in to comment.